In [None]:
import pandas as pd
import numpy as np
from string import ascii_lowercase
import itertools
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, Subset

# Build model

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(35, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 1)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
# Dictionary to map alphabet to number
categorical_dict = {}
def iter_all_strings():
    for size in itertools.count(1):
        for s in itertools.product(ascii_lowercase, repeat=size):
            yield "".join(s)
            
for i, s in enumerate(itertools.islice(iter_all_strings(), 55)):
    categorical_dict[s] = i

In [None]:
class ASMETestDataset(Dataset):
    def __init__(self, csv_file):
        super(ASMETestDataset, self).__init__()
        self.df = pd.read_csv(csv_file)
        
    def __getitem__(self, idx):
        item = self.df.loc[idx]
        x = item[1:].to_list()
        ID = item[0]
        for i in range(8):
            x[i] = categorical_dict[x[i]]
        return torch.from_numpy(np.array(x, dtype=np.float32)), ID
    
    def __len__(self):
        return len(self.df)

In [None]:
dataset = ASMETestDataset('./Dataset_Siemens/FinalHackathonData/test/test.csv')

predictions = pd.DataFrame(np.zeros((len(dataset), 2)), columns = ['ID', 'y'], dtype=object)

for i in range(5):
    if i == 1:
        continue
        
    net = Net()
    net.load_state_dict(torch.load('./model/model_fold{}.pth'.format(i)))
    
    prediction = pd.DataFrame(columns = ['ID', 'y'], dtype=object)
    
    with torch.no_grad():
        for i in range(len(dataset)):
            prediction.loc[i] = [dataset[i][1], net(dataset[i][0].unsqueeze(0)).item()]
        
    predictions = predictions.add(prediction)
    
predictions = predictions / 4

In [None]:
predictions['ID'] = predictions['ID'].astype(int)
predictions.round({'y': 2})

In [None]:
predictions.to_csv('task1_submission_longhorn.csv', index=False)