In [9]:
#Importing all the different python modules needed

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
from ffModel import FeedForward

In [10]:
#importing data into pandas data frame then separating the test values and input values, then finally using the DataLoader to set it up.
trainData = pd.read_csv('train.csv')
testValues = trainData.loc[:, 'target']
testValuesNum = testValues.to_numpy()
testValuesD = torch.tensor(testValuesNum, dtype=torch.float32)
columns = trainData.columns
columns = columns[columns != 'target']
trainValues = trainData.loc[:, columns]
trainValues = trainValues.drop(columns = 'id', axis=1)
trainValuesNum = trainValues.to_numpy()
trainValuesD = torch.tensor(trainValuesNum, dtype=torch.float32)
trainingData = DataLoader(TensorDataset(trainValuesD, testValuesD), batch_size=25, shuffle=False)
trainValues

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
0,-1.067,-1.114,-0.616,0.376,1.090,0.467,-0.422,0.460,-0.443,-0.338,...,0.220,-0.339,0.254,-0.179,0.352,0.125,0.347,0.436,0.958,-0.824
1,-0.831,0.271,1.716,1.096,1.731,-0.197,1.904,-0.265,0.557,1.202,...,-0.765,-0.735,-1.158,2.554,0.856,-1.506,0.462,-0.029,-1.932,-0.343
2,0.099,1.390,-0.732,-1.065,0.005,-0.081,-1.450,0.317,-0.624,-0.017,...,-1.311,0.799,-1.001,1.544,0.575,-0.309,-0.339,-0.148,-0.646,0.725
3,-0.989,-0.916,-1.343,0.145,0.543,0.636,1.127,0.189,-0.118,-0.638,...,-1.370,1.093,0.596,-0.589,-0.649,-0.163,-0.958,-1.081,0.805,3.401
4,0.811,-1.509,0.522,-0.360,-0.220,-0.959,0.334,-0.566,-0.656,-0.499,...,-0.178,0.718,-1.017,1.249,-0.596,-0.445,1.751,1.442,-0.393,-0.643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,-0.068,-0.184,-1.153,0.610,0.414,1.557,-0.234,0.950,0.896,1.416,...,1.492,1.430,-0.333,-0.200,-1.073,0.797,1.980,1.191,1.032,-0.402
246,-0.234,-1.373,-2.050,-0.408,-0.255,0.784,0.986,-0.891,-0.268,-0.569,...,-0.996,0.678,1.395,0.714,0.215,-0.537,-1.267,-1.021,0.747,0.128
247,-2.327,-1.834,-0.762,0.660,-0.858,-2.764,-0.539,-0.065,0.549,1.474,...,-1.237,-0.620,0.670,-2.010,0.438,1.972,-0.379,0.676,-1.220,-0.855
248,-0.451,-0.204,-0.762,0.261,0.022,-1.487,-1.122,0.141,0.369,-0.173,...,0.729,0.411,2.366,-0.021,0.160,0.045,0.208,-2.117,-0.546,-0.093


In [11]:
'''
Now its time to set up the model, optimizer, and the loss function to actually preform the training.
For the loss equation as im doing uni-variate normal regression to determine the correct numerical value based on 8 input values.
I will at first use default hyperparameters and assess how well the model is doing then adjust from there.
'''
model = FeedForward().to('cuda')
optimizer = optim.Adam(model.parameters())
loss = nn.MSELoss()

In [12]:
'''
Now I will implement a modular training loop
'''
def trainLoop(dataloader, model, lossf, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (x, y) in enumerate(dataloader):
        x = x.to('cuda')
        y = y.to('cuda')
        y = y.view(-1, 1)

        # Forward pass through NN
        prediction = model(x)
        loss = lossf(prediction, y)

        # Backwards pass through NN
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 5 == 0:
            loss_value = loss.item()
            current = batch * dataloader.batch_size + len(x)
            print(f"Batch: {batch}, Batch Size: {len(x)}")
            print(f"Loss: {loss_value:>7f} [{current}/{size}]")

In [13]:
def test_loop(dataloader, model, lossf):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    total_loss = 0

    with torch.no_grad():
        for X, y in dataloader:
            X = X.to('cuda')
            y = y.to('cuda')
            y = y.view(-1, 1)
            pred = model(X)
            total_loss += lossf(pred, y).item() * X.size(0)

    avg_loss = total_loss / size
    print(f"Test Error: \n Avg loss: {avg_loss:>8f} \n")

In [16]:
#Now to actually train the model in 10 epochs
for epoch in range(10):
    print(f'Epoch {epoch + 1}\n ------------------------------------------------')
    trainLoop(trainingData, model, loss, optimizer)
    test_loop(trainingData, model, loss)
print('Done!')

Epoch 1
 ------------------------------------------------
Batch: 0, Batch Size: 25
Loss: 0.020498 [25/250]
Batch: 5, Batch Size: 25
Loss: 0.033251 [150/250]
Test Error: 
 Avg loss: 0.023665 

Epoch 2
 ------------------------------------------------
Batch: 0, Batch Size: 25
Loss: 0.005515 [25/250]
Batch: 5, Batch Size: 25
Loss: 0.011402 [150/250]
Test Error: 
 Avg loss: 0.009457 

Epoch 3
 ------------------------------------------------
Batch: 0, Batch Size: 25
Loss: 0.004931 [25/250]
Batch: 5, Batch Size: 25
Loss: 0.003363 [150/250]
Test Error: 
 Avg loss: 0.003322 

Done!


In [15]:
#now to set up the testing to submit to kaggle.
testData = pd.read_csv('test.csv')
savedID = testData.loc[:, 'id']
testDataD = testData.drop(columns = 'id')
testDataD = testDataD.to_numpy()
testDataD = torch.tensor(testDataD, dtype=torch.float32)
testValuesFinal = DataLoader(TensorDataset(testDataD), shuffle=False)
model.eval()
predictions = []
with torch.no_grad():
    for item in testValuesFinal:
        item = item[0].to('cuda')
        prediction = model.forward(item)
        predictions.append(prediction.cpu().numpy())
submission = np.concatenate(predictions).reshape(-1, 1)
submission = pd.DataFrame(submission, columns=['target'])
final = pd.concat([savedID, submission], axis=1)
final.to_csv('submission.csv', index=False)