In [1]:
import argparse
import torch.optim as optim
from torch.utils.data import Dataset
import pandas as pd


import networkx as nx
from typing import Union

import random
import pickle

from MatrixVectorizer import *
from dataloaders import NoisyDataset
from model import *
from preprocessing import *
from train import *

In [2]:
# Set a fixed random seed for reproducibility across multiple libraries
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)

device = torch.device("cpu")

In [3]:
# load csvs as numpy
lr_data_path = './data/lr_train.csv'
hr_data_path = './data/hr_train.csv'

lr_train_data = pd.read_csv(lr_data_path, delimiter=',').to_numpy()
hr_train_data = pd.read_csv(hr_data_path, delimiter=',').to_numpy()
lr_train_data[lr_train_data < 0] = 0
np.nan_to_num(lr_train_data, copy=False)

hr_train_data[hr_train_data < 0] = 0
np.nan_to_num(hr_train_data, copy=False)

# map the anti-vectorize function to each row of the lr_train_data
lr_train_data_vectorized = torch.tensor([MatrixVectorizer.anti_vectorize(row, 160) for row in lr_train_data],
                                        dtype=torch.float32)
hr_train_data_vectorized = torch.tensor([MatrixVectorizer.anti_vectorize(row, 268) for row in hr_train_data],
                                        dtype=torch.float32)


  lr_train_data_vectorized = torch.tensor([MatrixVectorizer.anti_vectorize(row, 160) for row in lr_train_data],


In [4]:
train_data = NoisyDataset(lr_train_data_vectorized, hr_train_data_vectorized, noise_level=0.5)
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True)

In [5]:
num_splt = 3
epochs = 150
lr = 0.00005 # try [0.0001, 0.0005, 0.00001, 0.00005]
lmbda = 17 # should be around 15-20
lamdba_topo = 0.0005 # should be around 0.0001-0.001
lr_dim = 160
hr_dim = 320
hidden_dim = 320 # try smaller and larger - [160-512]
padding = 26
dropout = 0.2 # try [0., 0.1, 0.2, 0.3]


args = argparse.Namespace()
args.epochs = epochs
args.lr = lr
args.lmbda = lmbda
args.lamdba_topo = lamdba_topo
args.lr_dim = lr_dim
args.hr_dim = hr_dim
args.hidden_dim = hidden_dim
args.padding = padding
args.p = dropout


# Final Model & Kaggle Submission

In [6]:
#final train
ks = [0.9, 0.7, 0.6, 0.5]
criterion = nn.L1Loss()
final_model = GSRNet(ks, args)
optimizer = optim.Adam(final_model.parameters(), lr=args.lr)

final_model.to(device)

train(final_model, train_data_loader, optimizer, criterion, args)

Epoch:  1 Loss:  0.4559370149038509 Error:  0.20930187297081518 Topo:  36.53095437809379
Epoch:  2 Loss:  0.31040223326511723 Error:  0.1824158550557976 Topo:  20.50272953010605
Epoch:  3 Loss:  0.3028820294462992 Error:  0.17900242527088006 Topo:  19.702250640549344
Epoch:  4 Loss:  0.29685558762379033 Error:  0.17723631698214365 Topo:  19.075980215015527
Epoch:  5 Loss:  0.29153364468477444 Error:  0.17600707700866425 Topo:  18.935270829115087
Epoch:  6 Loss:  0.2865848348526184 Error:  0.17504006677758907 Topo:  18.830346689966625
Epoch:  7 Loss:  0.2825273882129235 Error:  0.17482053064657543 Topo:  19.06470959486362
Epoch:  8 Loss:  0.27701796894658826 Error:  0.17331782643666524 Topo:  18.635903695386325
Epoch:  9 Loss:  0.27315063899505637 Error:  0.17260170134598624 Topo:  18.844210367716716
Epoch:  10 Loss:  0.2695612660246695 Error:  0.17209978838880619 Topo:  18.84680119246066
Epoch:  11 Loss:  0.2653402993065154 Error:  0.1708580909553402 Topo:  18.706497318016556
Epoch:  1

In [7]:
import pickle
filename = 'final-model.sav'
pickle.dump(final_model, open(filename, 'wb'))

In [8]:
generate_submission_csv(final_model, args)