In [3]:
import sys
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import pickle
import numpy as np
import training
import config
import matplotlib.pyplot as plt
import os
import MyTransformer

models = ['gpt-nano', 'gpt-micro', 'gpt-mini', 'gopher-44m', 'gpt2']

In [2]:
# seed  = int(sys.argv[1])
# model_idx = int(sys.argv[2])
seed = 0
model_idx = 0
lr = -4

for lr in range(-3,-7,-1):
    exp_setup = f'{models[model_idx]}_lr_{lr}_seed_{seed}'
    print(f'The experiment setup is {exp_setup}.')

    if os.path.exists(f'./NNs/predictor_{exp_setup}'):
        pass
    else:
        data = torch.load(f'./data/dataset.ds')

        X_train = data['X_train'].to
        Y_train = data['Y_train']
        X_valid = data['X_valid']
        Y_valid = data['Y_valid']
        X_test  = data['X_test']
        Y_test  = data['Y_test']

        train_data = TensorDataset(X_train, Y_train)
        valid_data = TensorDataset(X_valid, Y_valid)
        test_data  = TensorDataset(X_test, Y_test)

        train_loader = DataLoader(train_data, batch_size=256)
        valid_loader = DataLoader(valid_data, batch_size=len(valid_data))
        test_loader  = DataLoader(test_data, batch_size=len(test_data))

        config.SetSeed(seed)

        model_config = MyTransformer.GPT.get_default_config()
        model_config.model_type = 'gpt-nano'
        model_config.block_size = X_train.shape[1]
        model = MyTransformer.GPT(model_config)

        lossfunction = torch.nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=10**lr)

        model, train_loss, valid_loss = training.train_nn(model, train_loader, valid_loader, lossfunction, optimizer, UUID=exp_setup)
        torch.save(model, f'./NNs/predictor_{exp_setup}')
        
        plt.figure()
        plt.plot(train_loss, label='train')
        plt.plot(valid_loss, label='valid')
        plt.savefig(f'./NNs/train_curve_{exp_setup}.pdf', format='pdf', bbox_inches='tight')
        plt.close()


The experiment setup is model_0_lr_-4_seed_0.
number of parameters: 0.10M
The ID for this training is -1269400444507779122_1693257366.


KeyboardInterrupt: 