In [34]:
import torch
import sys
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import numpy as np
import training
import config
import matplotlib.pyplot as plt
import os

In [15]:
results_all = []
for model_idx in range(4):

    results = []
    for seed in range(10):
        re_temp = torch.load(f'./data/SG_results_{seed}_{model_idx}.matrix')
        results.append(re_temp)
    results = torch.stack(results)
    
    results_all.append(results)
results_all = torch.stack(results_all).permute(1,0,2,3)

results_all
print(results_all.shape)

torch.Size([10, 4, 5, 2])


In [17]:
results_mean_seed = torch.tensor(np.nanmean(results_all.detach().numpy(), axis=0)) # [model, lr, valid/test]
results_mean_seed.shape

torch.Size([4, 5, 2])

In [24]:
re = torch.min(results_all, dim=0)[0]
re.shape

torch.Size([4, 5, 2])

In [25]:
re[:,:,0]

tensor([[0.0004, 0.0002, 0.0003, 0.0006, 0.0011],
        [0.0072, 0.0002, 0.0002, 0.0003, 0.0004],
        [0.0117, 0.0003, 0.0001, 0.0002, 0.0003],
        [0.0158, 0.0002, 0.0002, 0.0002, 0.0002]])

In [28]:
re[:,:,1]

tensor([[2.8877e-04, 1.6335e-04, 2.2238e-04, 3.6757e-04, 7.5891e-04],
        [5.9935e-03, 1.5501e-04, 1.2907e-04, 1.7465e-04, 2.6438e-04],
        [1.0005e-02, 1.8885e-04, 8.8399e-05, 1.2209e-04, 2.3763e-04],
        [1.4080e-02, 1.3621e-04, 1.3986e-04, 1.1737e-04, 1.4413e-04]])

In [29]:
results_all[:,2,2,1]

tensor([1.2501e-04, 2.9641e-04, 1.2081e-04, 1.8410e-04, 1.9595e-04, 1.6885e-04,
        1.3583e-04, 8.8399e-05, 9.2381e-05, 1.1652e-04])

In [35]:
data = torch.load(f'./data/dataset.ds')
X_valid = data['X_valid']
Y_valid = data['Y_valid']
X_test  = data['X_test']
Y_test  = data['Y_test']
valid_data = TensorDataset(X_valid, Y_valid)
test_data  = TensorDataset(X_test, Y_test)
valid_loader = DataLoader(valid_data, batch_size=32)
test_loader  = DataLoader(test_data, batch_size=32)

In [38]:
models = ['gpt-nano', 'gpt-micro', 'gpt-mini', 'gopher-44m', 'gpt2']

modelname = f'predictor_{models[2]}_lr_{-4}_seed_{7}'
model = torch.load(f'./NNs/{modelname}',map_location=torch.device('cpu'))

loss_fn = torch.nn.MSELoss(reduction='mean')

total_loss = 0.0
total_samples = 0
for x_valid, y_valid in valid_loader:
    prediction_valid = model(x_valid)
    L_valid = loss_fn(prediction_valid, y_valid)
    total_loss += L_valid.item() * x_valid.size(0)
    total_samples += x_valid.size(0)
weighted_mean_loss_valid = total_loss / total_samples

total_loss = 0.0
total_samples = 0
for x_test, y_test in test_loader:
    prediction_test = model(x_test)
    L_test = loss_fn(prediction_test, y_test)
    total_loss += L_test.item() * x_test.size(0)
    total_samples += x_test.size(0)
weighted_mean_loss_test = total_loss / total_samples


print(weighted_mean_loss_valid, weighted_mean_loss_test)

0.00013096270156286969 8.839867858720633e-05


In [42]:
torch.save(model, f'./NNs/final_SG.model')
torch.save(model, f'../utils/final_SG.model')

In [39]:
test = torch.randn([1,100,1])

In [41]:
model(test)

tensor([0.1029, 0.1029, 0.1027, 0.1031, 0.1033, 0.1027, 0.1004, 0.1096, 0.1035,
        0.1081, 0.1051, 0.1038, 0.1035, 0.1064, 0.1033, 0.1059, 0.1047, 0.1050,
        0.0988, 0.1048, 0.1051, 0.1113, 0.1043, 0.0966, 0.1386, 0.1131, 0.1050,
        0.1045, 0.1137, 0.2691, 0.1594, 0.1202, 0.1011, 0.1044, 0.7218, 0.6571,
        0.1410, 0.1654, 0.1285, 0.1072, 0.1075, 0.1060, 0.1065, 0.1025, 0.1062,
        0.1066, 0.1029, 0.1055, 0.1052, 0.1011, 0.1002, 0.1038, 0.1056, 0.1041,
        0.0902, 0.1032, 0.0990, 0.1034, 0.1003, 0.1059, 0.1066, 0.0999, 0.1057,
        0.0991, 0.1031, 0.0942, 0.0975, 0.1043, 0.1042, 0.1006, 0.1132, 0.1053,
        0.1068, 0.1025, 0.1068, 0.0953, 0.1048, 0.1060, 0.1121, 0.1175, 0.1017,
        0.1239, 0.1748, 0.1759, 0.1689, 0.3815, 0.1138, 0.1028, 0.2237, 0.2780,
        0.5996, 0.5484, 0.1388, 0.1270, 0.1159, 0.1114, 0.1082, 0.1092, 0.1109,
        0.1018], grad_fn=<SqueezeBackward0>)