# Notebook for Inference

This notebook is used to run inference on the trained model. 

It is used to generate the predictions on the test set and to generate the submission file for the competition.

> Note: We suppose that this code is executing on the Google Colab platform. The following code is used to import the libraries we need.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
cp: missing destination file operand after '/content/models/model.py'
Try 'cp --help' for more information.


In [None]:
import csv
import torch
import pandas as pd
from models.model import LSTMwithAttn
from dataset import SunlightDataset
from torch.utils.data import DataLoader

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
model = LSTMwithAttn(input_dim=13, hidden_dim=64, output_dim=1, num_layers=2, device=device)

# Load the model
model.load_state_dict(torch.load('models/model.pt'))
model.to(device)

# testdata = pd.read_csv('dataset/Tdata10.csv')
# testdata = pd.read_csv('dataset/data17.csv').drop('amount', axis=1)
testdata = pd.read_csv('dataset/Tdata17.csv')

features_test = testdata
labels_test = pd.DataFrame({'label': [0] * len(testdata)})
test_dataset = SunlightDataset(features_test, labels_test)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [None]:
# get number of rows in testdata
num_test = len(testdata)
num_test

24

In [None]:
len(test_dataset)

23

In [None]:
next(iter(test_loader))[0].shape

torch.Size([1, 2, 13])

In [None]:
y_pred = [[] for _ in range(num_test)]
len(y_pred)

24

In [None]:
len(test_loader)

23

In [None]:
# test phase
model.eval()

with torch.no_grad():
    for i, (features, labels) in enumerate(test_loader):
        features = features.to(device)
        labels = labels.to(device)

        outputs = model(features).squeeze(-1)
        outputs[outputs < 0] = 0

        for seq_idx in range(outputs.shape[-1]):
            y_pred[i + seq_idx].append(outputs[:,seq_idx].item())


In [None]:
y_pred = [sum(y_pred[i]) / len(y_pred[i]) for i in range(len(y_pred))]

In [None]:
y_pred_pd = pd.DataFrame(y_pred)
# y_pred_pd.to_csv('dataset/sun.csv', index=False)
y_pred_pd.to_csv('dataset/afternoon.csv', index=False)

In [None]:
y_pred

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.09375838935375214,
 8.090917348861694,
 30.550765991210938,
 42.401145935058594,
 50.96958351135254,
 54.080318450927734,
 50.69963836669922,
 47.708961486816406,
 35.20708656311035,
 8.04421067237854,
 0.09835322201251984,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]