In [69]:
from preprocessing import *
from sklearn.model_selection import KFold
import argparse
from model import *
from train import test
import torch.optim as optim
import pandas as pd

from MatrixVectorizer import *


In [72]:
# load csvs as numpy
lr_data_path = '../data/lr_train.csv'
hr_data_path = '../data/hr_train.csv'

lr_train_data = pd.read_csv(lr_data_path, delimiter=',').to_numpy()
hr_train_data = pd.read_csv(hr_data_path, delimiter=',').to_numpy()
lr_train_data[lr_train_data < 0] = 0
np.nan_to_num(lr_train_data, copy=False)

hr_train_data[hr_train_data < 0] = 0
np.nan_to_num(hr_train_data, copy=False)



# map the anti-vectorize function to each row of the lr_train_data

lr_train_data_vectorized = np.array([MatrixVectorizer.anti_vectorize(row, 160) for row in lr_train_data])
hr_train_data_vectorized = np.array([MatrixVectorizer.anti_vectorize(row, 268) for row in hr_train_data])
num_samples = hr_train_data_vectorized.shape[0]
split = int(num_samples * 0.8)

[[0.33881717 0.20248584 0.03983874 ... 0.42020538 0.25529165 0.18337431]
 [0.63832894 0.11630679 0.89276219 ... 0.73026434 0.44675317 0.48986114]
 [0.62471606 0.02028873 0.2971744  ... 0.52366473 0.60326226 0.43792296]
 ...
 [0.29367774 0.54314711 0.17428709 ... 0.22557486 0.71399684 0.36689618]
 [0.60139502 0.77059429 0.60485866 ... 0.68879933 0.74031453 0.41991915]
 [0.50114943 0.27204258 0.3557178  ... 0.43947877 0.60193006 0.31666221]]


In [73]:

subjects_adj,subjects_labels = lr_train_data_vectorized[:split], hr_train_data_vectorized[:split]

held_out_subjects_adj,held_out_subjects_labels = lr_train_data_vectorized[split:], hr_train_data_vectorized[split:]

In [74]:
num_splt = 3
epochs = 200
lr = 0.00005
lmbda = 35
lr_dim = 160
hr_dim = 320
hidden_dim = 320
padding = 26
dropout = 0.3
args = argparse.Namespace()
args.epochs = epochs
args.lr = lr
args.lmbda = lmbda
args.lr_dim = lr_dim
args.hr_dim = hr_dim
args.hidden_dim = hidden_dim
args.padding = padding
args.p = dropout


In [75]:
cv = KFold(n_splits=3, random_state=42, shuffle=True)

In [76]:
ks = [0.9, 0.7, 0.6, 0.5]
model = GSRNet(ks, args)

In [78]:
criterion = nn.L1Loss()

def train(model, optimizer, subjects_adj,subjects_labels, args): 
  #, subjects_adj_test, subjects_ground_truth_test):
  
  all_epochs_loss = []
  no_epochs = args.epochs

  for epoch in range(no_epochs):
    epoch_loss = []
    epoch_error = []

    model.train()
    for lr,hr in zip(subjects_adj,subjects_labels):      
      lr = torch.from_numpy(lr).type(torch.FloatTensor)
      hr = torch.from_numpy(hr).type(torch.FloatTensor)
      
      
      # net_outs,start_gcn_outs,layer_outs = model(lr)
      model_outputs,net_outs,start_gcn_outs,layer_outs = model(lr)
      model_outputs  = unpad(model_outputs, args.padding)
      # weights = unpad(model.layer.weights, args.padding)
      

      padded_hr = pad_HR_adj(hr,args.padding)
      eig_val_hr, U_hr = torch.linalg.eigh(padded_hr, UPLO='U')

      # print the shapes of the outputs
      # print(f"{net_outs.shape} ; {start_gcn_outs.shape}")
      # print(f"{model.layer.weights.shape} ; {U_hr.shape}")
      # print(f"{model_outputs.shape} ; {hr.shape}")
      
      # loss = criterion(net_outs, start_gcn_outs) + criterion(model.layer.weights,U_hr) + args.lmbda * criterion(model_outputs, hr) 
      # loss = criterion(model_outputs, hr) 
      loss = args.lmbda * criterion(net_outs, start_gcn_outs) + criterion(model.layer.weights,U_hr) + criterion(model_outputs, hr) 

      
      error = criterion(model_outputs, hr)
      
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      epoch_loss.append(loss.item())
      epoch_error.append(error.item())
      
  
    model.eval()
    print("Epoch: ",epoch+1, "Loss: ", np.mean(epoch_loss), "Error: ", np.mean(epoch_error))
    # test(model, held_out_subjects_adj, held_out_subjects_labels, args)
    # test(model, subjects_adj_test, subjects_ground_truth_test, args)
    all_epochs_loss.append(np.mean(epoch_loss))

In [None]:
# # print(model)
# optimizer = optim.Adam(model.parameters(), lr=args.lr)
# # optimizer = optim.SGD(model.parameters(), lr=args.lr)

# train(model, optimizer, subjects_adj, subjects_labels, args)

# print('Held out test score:')
# test(model, held_out_subjects_adj, held_out_subjects_labels, args)
# print('------------------------------')

In [None]:
# # print(model)
# optimizer = optim.Adam(model.parameters(), lr=args.lr)
# # optimizer = optim.SGD(model.parameters(), lr=args.lr)

# for train_index, test_index in cv.split(subjects_adj):
#     subjects_adj_train = subjects_adj[train_index]  # Get training data 
#     subjects_adj_test = subjects_adj[test_index]   # Get testing data 
#     subjects_ground_truth_train = subjects_labels[train_index]
#     subjects_ground_truth_test = subjects_labels[test_index]

#     train(model, optimizer, subjects_adj_train, subjects_ground_truth_train, args, subjects_adj_test, subjects_ground_truth_test)
    
#     print('Held out test score:')
#     test(model, held_out_subjects_adj, held_out_subjects_labels, args)
#     print('------------------------------')

# Final Model & Kaggle Submission

In [79]:
#final train
final_model = GSRNet(ks, args)
optimizer = optim.Adam(final_model.parameters(), lr=args.lr)

train(final_model, optimizer, lr_train_data_vectorized, hr_train_data_vectorized, args)

Epoch:  1 Loss:  0.6943827694404625 Error:  0.24107033380134377
Epoch:  2 Loss:  0.42194930557719246 Error:  0.2097839743434312
Epoch:  3 Loss:  0.39719308725374186 Error:  0.19953125242344633
Epoch:  4 Loss:  0.3804442973550922 Error:  0.19597093252364747
Epoch:  5 Loss:  0.36628957708438714 Error:  0.19365328202347556
Epoch:  6 Loss:  0.3536843803828348 Error:  0.19148478268863198
Epoch:  7 Loss:  0.34289177467009263 Error:  0.18970103044352846
Epoch:  8 Loss:  0.3340673610835732 Error:  0.18852050798738787
Epoch:  9 Loss:  0.32568974409274715 Error:  0.18675150084281397
Epoch:  10 Loss:  0.31787273912372704 Error:  0.1846015876281761
Epoch:  11 Loss:  0.3112945330000209 Error:  0.183010894113672
Epoch:  12 Loss:  0.305807844250502 Error:  0.18195410688480218
Epoch:  13 Loss:  0.30113224230126706 Error:  0.18125191107838454
Epoch:  14 Loss:  0.2970472222673679 Error:  0.18076733290078398
Epoch:  15 Loss:  0.2934673557024516 Error:  0.18043872598045602
Epoch:  16 Loss:  0.289685543604

In [80]:
#Generate submission 

# load csvs as numpy
test_lr_data_path = '../data/lr_test.csv'

# lr_test_data = np.loadtxt(test_lr_data_path, delimiter=',')
lr_test_data = pd.read_csv(test_lr_data_path, delimiter=',').to_numpy()
print(lr_test_data.shape)
lr_test_data[lr_test_data < 0] = 0
np.nan_to_num(lr_test_data, copy=False)


# map the anti-vectorize function to each row of the lr_train_data

lr_test_data_vectorized = np.array([MatrixVectorizer.anti_vectorize(row, 160) for row in lr_test_data])
print(lr_test_data_vectorized.shape)

(112, 12720)
(112, 160, 160)


In [81]:
final_model.eval()
preds = []
for lr in lr_test_data_vectorized:      
  lr = torch.from_numpy(lr).type(torch.FloatTensor)
  
  model_outputs, _, _, _ = final_model(lr)
  model_outputs  = unpad(model_outputs, args.padding)
  preds.append(MatrixVectorizer.vectorize(model_outputs.detach().numpy()))

print(len(preds), preds[0].shape)
r = np.hstack(preds)
print(r.shape)
meltedDF = r.flatten()

112 (35778,)
(4007136,)


In [85]:
n = meltedDF.shape[0]
df = pd.DataFrame({'ID': np.arange(1, n+1),
                   'Predicted': meltedDF})
df.to_csv('submission.csv', index=False)