In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
import pandas as pd
import numpy as np
import math
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
batch_size = 64

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
class SiameseNet(nn.Module):
    def __init__(self):
        super(SiameseNet, self).__init__()
        self.body = nn.Sequential(
            nn.Linear(324, 512),
            nn.GELU(),
            nn.Linear(512, 512),
        )
        self.classifier = nn.Sequential(
            nn.SiLU(),
            nn.Linear(512, 512),
            nn.ELU(),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
        
    def forward(self, input1, input2):
        output1 = self.body(input1)
        output2 = self.body(input2)
        output = torch.abs(torch.subtract(output1, output2))
        output = self.classifier(output)
        return output
    
model = SiameseNet();
model.load_state_dict(torch.load('model_v5_39.pth'))

<All keys matched successfully>

In [4]:
model.to(device);

In [5]:
class LCTTestDataset(Dataset):
    def __init__(self, data, pairs):
        self.data = pd.read_csv(data)
        self.pairs = pd.read_parquet(pairs)
        
    def __len__(self):
        return len(self.pairs)
    
    def __getitem__(self,index):
        data1 = self.data[self.data['variantid'] == self.pairs['variantid1'][index]].iloc[0, 1:].values
        
        data2 = self.data[self.data['variantid'] == self.pairs['variantid2'][index]].iloc[0, 1:].values
        
        target = None
        return data1, data2,

In [6]:
data_path = 'dataset.csv'
pairs_path = 'test_pairs_wo_target.parquet'

In [7]:
for param in model.parameters():
    param.requires_grad = False

model.eval()
test_pred = torch.Tensor()    
testdataset = LCTTestDataset(data=data_path, pairs=pairs_path)    # get dataset
testloader = torch.utils.data.DataLoader(testdataset, batch_size=128, shuffle=False)    # make batches

for features1, features2 in testloader:
    inputs1, inputs2 = features1.to(device).float(), features2.to(device).float()
    outputs = model(inputs1, inputs2)    # predict
    test_pred = torch.cat((test_pred.to(device), outputs.to(device)), dim=0)

In [8]:
test = pd.read_parquet('test_pairs_wo_target.parquet')
test['target'] = test_pred.detach().cpu()

In [9]:
test.to_csv('sub16.csv', index=False)