In [100]:
from preprocessing import *
from sklearn.model_selection import KFold
import argparse
from model import *
from train import test
import torch.optim as optim
import pandas as pd

from MatrixVectorizer import *
import networkx as nx
from typing import Union


In [12]:
# load csvs as numpy
lr_data_path = '../data/lr_train.csv'
hr_data_path = '../data/hr_train.csv'

lr_train_data = pd.read_csv(lr_data_path, delimiter=',').to_numpy()
hr_train_data = pd.read_csv(hr_data_path, delimiter=',').to_numpy()
lr_train_data[lr_train_data < 0] = 0
np.nan_to_num(lr_train_data, copy=False)

hr_train_data[hr_train_data < 0] = 0
np.nan_to_num(hr_train_data, copy=False)



# map the anti-vectorize function to each row of the lr_train_data

lr_train_data_vectorized = np.array([MatrixVectorizer.anti_vectorize(row, 160) for row in lr_train_data])
hr_train_data_vectorized = np.array([MatrixVectorizer.anti_vectorize(row, 268) for row in hr_train_data])
num_samples = hr_train_data_vectorized.shape[0]
split = int(num_samples * 0.8)

In [13]:

subjects_adj,subjects_labels = lr_train_data_vectorized[:split], hr_train_data_vectorized[:split]

held_out_subjects_adj,held_out_subjects_labels = lr_train_data_vectorized[split:], hr_train_data_vectorized[split:]

In [148]:
num_splt = 3
epochs = 10
lr = 0.00005
lmbda = 25
lr_dim = 160
hr_dim = 320
hidden_dim = 320
padding = 26
dropout = 0.1
args = argparse.Namespace()
args.epochs = epochs
args.lr = lr
args.lmbda = lmbda
args.lr_dim = lr_dim
args.hr_dim = hr_dim
args.hidden_dim = hidden_dim
args.padding = padding
args.p = dropout


In [82]:
cv = KFold(n_splits=3, random_state=42, shuffle=True)

In [83]:
ks = [0.7, 0.5]
model = GSRNet(ks, args)

In [153]:
class TopologicalMeasures:
    def __init__(self,graph:Union[np.ndarray,torch.Tensor]):
        if isinstance(graph,np.ndarray):
            self.graph = nx.Graph(graph)
        elif isinstance(graph,torch.Tensor):
            graph_numpy = graph.cpu().detach().numpy()
            self.graph = nx.Graph(graph_numpy)

    def compute_measures(self):
        measures = {}
        measures['degree'] = torch.FloatTensor(list(dict(self.graph.degree()).values()))
        measures['clustering'] = torch.FloatTensor(list(nx.clustering(self.graph).values()))
        measures['closeness'] = torch.FloatTensor(list(nx.closeness_centrality(self.graph).values()))
        measures['betweenness'] = torch.FloatTensor(list(nx.betweenness_centrality(self.graph).values()))
        measures['pagerank'] = torch.FloatTensor(list(nx.pagerank(self.graph).values()))
        measures['eigenvector'] = torch.FloatTensor(list(nx.eigenvector_centrality(self.graph).values()))
        return measures

    # @staticmethod
def compute_topological_MAE_loss(graph1:Union[np.ndarray,torch.Tensor],graph2:Union[np.ndarray,torch.Tensor]):
    measures1 = TopologicalMeasures(graph1).compute_measures()
    measures2 = TopologicalMeasures(graph2).compute_measures()
    loss = 0
    # compute MAE for each measure
    
    for measure in measures1:
        loss += F.l1_loss(measures1[measure], measures2[measure])
    loss = loss/len(measures1)

    return loss


In [154]:
criterion = nn.L1Loss()

def train(model, optimizer, subjects_adj,subjects_labels, args): 
  #, subjects_adj_test, subjects_ground_truth_test):
  
  all_epochs_loss = []
  no_epochs = args.epochs

  for epoch in range(no_epochs):
    epoch_loss = []
    epoch_error = []
    epoch_topo = []

    model.train()
    for lr,hr in zip(subjects_adj,subjects_labels):      
      lr = torch.from_numpy(lr).type(torch.FloatTensor)
      hr = torch.from_numpy(hr).type(torch.FloatTensor)
      
      
      # net_outs,start_gcn_outs,layer_outs = model(lr)
      model_outputs,net_outs,start_gcn_outs,layer_outs = model(lr)
      model_outputs  = unpad(model_outputs, args.padding)
      # weights = unpad(model.layer.weights, args.padding)
      

      padded_hr = pad_HR_adj(hr,args.padding)
      eig_val_hr, U_hr = torch.linalg.eigh(padded_hr, UPLO='U')

      # print the shapes of the outputs
      # print(f"{net_outs.shape} ; {start_gcn_outs.shape}")
      # print(f"{model.layer.weights.shape} ; {U_hr.shape}")
      # print(f"{model_outputs.shape} ; {hr.shape}")
      
      # loss = criterion(net_outs, start_gcn_outs) + criterion(model.layer.weights,U_hr) + args.lmbda * criterion(model_outputs, hr) 
      # loss = criterion(model_outputs, hr) 
      loss = args.lmbda * criterion(net_outs, start_gcn_outs) + criterion(model.layer.weights,U_hr) + criterion(model_outputs, hr) 
      topo = compute_topological_MAE_loss(hr, model_outputs)
      print(topo.item())
      loss += topo

      
      error = criterion(model_outputs, hr)
      
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      epoch_loss.append(loss.item())
      epoch_error.append(error.item())
      epoch_topo.append(topo.item())
      
  
    model.eval()
    print("Epoch: ",epoch+1, "Loss: ", np.mean(epoch_loss), "Error: ", np.mean(epoch_error), "Topo: ", np.mean(epoch_topo))
    # test(model, held_out_subjects_adj, held_out_subjects_labels, args)
    # test(model, subjects_adj_test, subjects_ground_truth_test, args)
    all_epochs_loss.append(np.mean(epoch_loss))

In [18]:
# # print(model)
# optimizer = optim.Adam(model.parameters(), lr=args.lr)
# # optimizer = optim.SGD(model.parameters(), lr=args.lr)

# train(model, optimizer, subjects_adj, subjects_labels, args)

# print('Held out test score:')
# test(model, held_out_subjects_adj, held_out_subjects_labels, args)
# print('------------------------------')

In [19]:
# # print(model)
# optimizer = optim.Adam(model.parameters(), lr=args.lr)
# # optimizer = optim.SGD(model.parameters(), lr=args.lr)

# for train_index, test_index in cv.split(subjects_adj):
#     subjects_adj_train = subjects_adj[train_index]  # Get training data 
#     subjects_adj_test = subjects_adj[test_index]   # Get testing data 
#     subjects_ground_truth_train = subjects_labels[train_index]
#     subjects_ground_truth_test = subjects_labels[test_index]

#     train(model, optimizer, subjects_adj_train, subjects_ground_truth_train, args, subjects_adj_test, subjects_ground_truth_test)
    
#     print('Held out test score:')
#     test(model, held_out_subjects_adj, held_out_subjects_labels, args)
#     print('------------------------------')

# Final Model & Kaggle Submission

In [155]:
#final train
final_model = GSRNet(ks, args)
optimizer = optim.Adam(final_model.parameters(), lr=args.lr)

train(final_model, optimizer, lr_train_data_vectorized, hr_train_data_vectorized, args)

tensor([257., 249., 257., 253., 256., 253., 242., 228., 219., 243., 229., 236.,
        221., 250., 242., 239., 242., 238., 232., 228., 236., 246., 219., 240.,
        208., 206., 247., 231., 238., 240., 238., 245., 189., 230., 230., 233.,
        244., 190., 177., 230., 192., 249., 248., 218., 213., 230., 221., 239.,
        257., 240., 250., 249., 246., 238., 232., 222., 243., 254., 256., 254.,
        248., 228., 247., 250., 246., 258., 257., 249., 236., 240., 231., 214.,
        216., 237., 255., 253., 250., 255., 243., 249., 227., 232., 232., 246.,
        201., 207., 161., 203., 245., 207., 239., 235., 108., 250., 237., 256.,
        254., 255., 246., 140., 204., 203., 245., 209., 190., 231., 157., 128.,
        177., 215., 231., 186., 206., 224., 121., 206., 172., 239., 241.,  39.,
        209., 137., 249., 245., 255., 214., 122., 250., 157., 221., 180., 240.,
        216., 256., 251., 246., 250., 253., 258., 248., 257., 239., 214., 231.,
        216., 234., 225., 208., 209., 23

KeyboardInterrupt: 

In [None]:
#Generate submission 

# load csvs as numpy
test_lr_data_path = '../data/lr_test.csv'

# lr_test_data = np.loadtxt(test_lr_data_path, delimiter=',')
lr_test_data = pd.read_csv(test_lr_data_path, delimiter=',').to_numpy()
print(lr_test_data.shape)
lr_test_data[lr_test_data < 0] = 0
np.nan_to_num(lr_test_data, copy=False)


# map the anti-vectorize function to each row of the lr_train_data

lr_test_data_vectorized = np.array([MatrixVectorizer.anti_vectorize(row, 160) for row in lr_test_data])
print(lr_test_data_vectorized.shape)

(112, 12720)
(112, 160, 160)


In [None]:
final_model.eval()
preds = []
for lr in lr_test_data_vectorized:      
  lr = torch.from_numpy(lr).type(torch.FloatTensor)
  
  model_outputs, _, _, _ = final_model(lr)
  model_outputs  = unpad(model_outputs, args.padding)
  preds.append(MatrixVectorizer.vectorize(model_outputs.detach().numpy()))

print(len(preds), preds[0].shape)
r = np.hstack(preds)
print(r.shape)
meltedDF = r.flatten()

112 (35778,)
(4007136,)


In [None]:
n = meltedDF.shape[0]
df = pd.DataFrame({'ID': np.arange(1, n+1),
                   'Predicted': meltedDF})
df.to_csv('submission.csv', index=False)