In [1]:
import torch
import sys
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import numpy as np
import training
import config
import matplotlib.pyplot as plt
import os

models = ['gpt-nano', 'gpt-micro', 'gpt-mini', 'gopher-44m', 'gpt2']

In [2]:
data = torch.load(f'./data/dataset.ds')
X_valid = data['X_valid']
Y_valid = data['Y_valid']
X_test  = data['X_test']
Y_test  = data['Y_test']
valid_data = TensorDataset(X_valid, Y_valid)
test_data  = TensorDataset(X_test, Y_test)
valid_loader = DataLoader(valid_data, batch_size=32)
test_loader  = DataLoader(test_data, batch_size=32)

results = torch.zeros([10, 4, 3, 2])

for seed in range(10):
    for model_idx in range(4):
        for l, lr in enumerate([-2,-3,-4]):

            modelname = f'predictor_{models[model_idx]}_lr_{lr}_seed_{seed}'
            if os.path.isfile(f'./NNs/{modelname}'):

                model = torch.load(f'./NNs/{modelname}',map_location=torch.device('cpu'))

                config.SetSeed(seed)

                loss_fn = torch.nn.MSELoss(reduction='mean')

                total_loss = 0.0
                total_samples = 0
                for x_valid, y_valid in valid_loader:
                    prediction_valid = model(x_valid)
                    L_valid = loss_fn(prediction_valid, y_valid)
                    total_loss += L_valid.item() * x_valid.size(0)
                    total_samples += x_valid.size(0)
                weighted_mean_loss = total_loss / total_samples
                results[seed, model_idx, l, 0] = weighted_mean_loss
                
                total_loss = 0.0
                total_samples = 0
                for x_test, y_test in test_loader:
                    prediction_test = model(x_test)
                    L_test = loss_fn(prediction_test, y_test)
                    total_loss += L_test.item() * x_test.size(0)
                    total_samples += x_test.size(0)
                weighted_mean_loss = total_loss / total_samples
                results[seed, model_idx, l, 1] = weighted_mean_loss

                print(results[seed, model_idx, l, 0], results[seed, model_idx, l, 1])
            else:
                results[seed, model_idx, l, :] = float('nan')

tensor(6.2663e-06) tensor(5.4309e-06)
tensor(1.3682e-06) tensor(1.1255e-06)
tensor(4.5144e-06) tensor(4.6042e-06)
tensor(0.0024) tensor(0.0026)
tensor(4.6192e-06) tensor(4.1223e-06)
tensor(2.3643e-06) tensor(1.8001e-06)
tensor(0.0002) tensor(0.0002)
tensor(2.3394e-05) tensor(2.2360e-05)
tensor(1.7461e-06) tensor(1.4930e-06)
tensor(0.0007) tensor(0.0008)
tensor(6.0357e-05) tensor(5.8595e-05)
tensor(6.9260e-07) tensor(6.1065e-07)
tensor(1.7946e-05) tensor(1.8259e-05)
tensor(1.7935e-06) tensor(1.5176e-06)
tensor(1.3790e-06) tensor(1.1861e-06)
tensor(0.0008) tensor(0.0009)
tensor(1.2566e-05) tensor(1.1974e-05)
tensor(2.5719e-06) tensor(2.2689e-06)
tensor(0.0004) tensor(0.0004)
tensor(4.9375e-06) tensor(4.1719e-06)
tensor(5.9776e-06) tensor(4.9898e-06)
tensor(0.0002) tensor(0.0002)
tensor(8.5867e-06) tensor(7.8225e-06)
tensor(1.6196e-05) tensor(1.4571e-05)
tensor(1.1018e-05) tensor(1.0753e-05)
tensor(3.8513e-06) tensor(3.5512e-06)
tensor(3.4587e-06) tensor(3.0770e-06)
tensor(0.0001) tensor(

In [4]:
results_mean_seed = torch.tensor(np.nanmean(results.detach().numpy(), axis=0)) # [model, lr, valid/test]
results_mean_seed.shape

torch.Size([4, 3, 2])

In [5]:
# model
results_layer = results_mean_seed.reshape(4,-1,2).min(1)[0]
results_layer

tensor([[2.2048e-06, 2.0696e-06],
        [2.8142e-06, 2.4549e-06],
        [9.8968e-06, 9.3305e-06],
        [2.0879e-05, 2.0558e-05]])

In [6]:
# lr
results_layer = results_mean_seed.permute(1,0,2).reshape(3,-1,2).min(1)[0]
results_layer

tensor([[1.2589e-05, 1.2080e-05],
        [4.9374e-06, 4.4583e-06],
        [2.2048e-06, 2.0696e-06]])

In [7]:
results[:,0,2,:]

tensor([[4.5144e-06, 4.6042e-06],
        [1.3790e-06, 1.1861e-06],
        [3.4587e-06, 3.0770e-06],
        [1.1707e-06, 1.1348e-06],
        [2.0738e-06, 2.0574e-06],
        [1.3088e-06, 1.1401e-06],
        [3.5839e-06, 3.4851e-06],
        [1.0539e-06, 9.7044e-07],
        [1.3416e-06, 1.2946e-06],
        [2.1633e-06, 1.7468e-06]])

In [8]:
modelname = f'predictor_{models[0]}_lr_{-4}_seed_{7}'
model = torch.load(f'./NNs/{modelname}',map_location=torch.device('cpu'))

config.SetSeed(seed)

loss_fn = torch.nn.MSELoss(reduction='mean')

total_loss = 0.0
total_samples = 0
for x_valid, y_valid in valid_loader:
    prediction_valid = model(x_valid)
    L_valid = loss_fn(prediction_valid, y_valid)
    total_loss += L_valid.item() * x_valid.size(0)
    total_samples += x_valid.size(0)
weighted_mean_loss = total_loss / total_samples
results[seed, model_idx, l, 0] = weighted_mean_loss

total_loss = 0.0
total_samples = 0
for x_test, y_test in test_loader:
    prediction_test = model(x_test)
    L_test = loss_fn(prediction_test, y_test)
    total_loss += L_test.item() * x_test.size(0)
    total_samples += x_test.size(0)
weighted_mean_loss = total_loss / total_samples
results[seed, model_idx, l, 1] = weighted_mean_loss

print(results[seed, model_idx, l, 0], results[seed, model_idx, l, 1])


tensor(1.0539e-06) tensor(9.7044e-07)


In [10]:
torch.save(model, f'./NNs/final_SG.model')
torch.save(model, f'../utils/final_SG.model')