In [1]:
%pip install torch tensorboardX tensorboard pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from tensorboardX import SummaryWriter

In [3]:

class CustomDataSet(Dataset):
    def __init__(self, x,y):
        self.data = x
        self.label = y
        
    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, index):
        data = self.data[index]
        label = self.label[index]

        return data, label

In [4]:
class Net(nn.Module):
  def __init__(self,input_shape,layerWidth=512):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_shape,layerWidth)
    self.fc2 = nn.Linear(layerWidth,layerWidth)
    self.fc3 = nn.Linear(layerWidth,layerWidth)
    self.fc4 = nn.Linear(layerWidth,layerWidth)
    self.fc5 = nn.Linear(layerWidth,1)
  def forward(self,x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = torch.relu(self.fc3(x))
    x = torch.relu(self.fc4(x))
    x = torch.sigmoid(self.fc5(x))
    return x


In [5]:
csv_file_path = "./dataset/train_denoised.csv"
df = pd.read_csv(csv_file_path)
y = df["Transported"]
df = df.drop("Transported", axis=1)
x = df.values

print(x.shape)


(8693, 23)


In [6]:
# Lets create an object from our custom dataset class
train_data_object = CustomDataSet(x,y)

# Now lets use Data loader to load the data in batches
train_loader = torch.utils.data.DataLoader(
        train_data_object,
        batch_size=1024,
        shuffle=False
    )

In [7]:
# train_features, train_labels = next(iter(train_loader))
# print(f"Feature batch shape: {train_features.size()}")
# print(f"Labels batch shape: {train_labels.size()}")
# # print(train_features,train_labels)
# testModel = Net(20)
# train_features = train_features.to(torch.float32)
# output = testModel(train_features)
# output = torch.round(output)
# print(output)
# print(train_labels)
# print(output.eq(train_labels.data.view_as(output)).cpu().sum())

In [8]:
model = Net(x.shape[1])
optimizer = optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.BCELoss()


In [9]:
def train(model,epochs = 10, model_path = "./model.pth",log_path = "./log_pytorch/"):
    tb = SummaryWriter(log_path)

    # put the model into training mode
    model.train()
    for epoch in range(1, epochs + 1):
        correct = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            
            data = data.to(torch.float32)
            # print("data",data)
            target = target.to(torch.float32)
            target = target.unsqueeze(1)
            #calculate output
            output = model(data)
            # print(output)
            #calculate loss
            loss = loss_fn(output,target)
        
            #accuracy
            predicted = model(torch.tensor(x,dtype=torch.float32))
            acc = (predicted.reshape(-1).detach().numpy().round() == y).mean()
            #backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % 100 == 0:
                # losses.append(loss)
                # accur.append(acc)
                print("epoch {}\tloss : {}\tAcc: {}".format(epoch,loss,acc))

        tb.add_scalar("epoch loss", loss.item(), epoch)
        tb.add_scalar("epoch accuracy", acc, epoch)
        for name, weight in model.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad',weight.grad, epoch)
    # save the model to a .pth file
    print('Saving NN to %s' % model_path)
    torch.save(model.state_dict(), model_path)
    # add graph to tensorboard
    tb.add_graph(model, (data,))

In [10]:
def test(model, model_path="./model.pth",testDataPath = "./dataset/test_preprocessed"):

    df = pd.read_csv(testDataPath)
    y = df["PassengerId"]
    df = df.drop("PassengerId", axis=1)
    x = df.values

    model.load_state_dict(torch.load(model_path))
    
    # put model into test mode
    model.eval()
    with torch.no_grad():
    
        output = model(torch.tensor(x,dtype=torch.float32))
        pred = output.reshape(-1).detach().numpy().round()
        label = y.values
        
        result = {"PassengerId" : label,
                  "Transported" : pred}
        
        return result

In [11]:
modelPath = "./model_clean.pth"
train(model,epochs=500,model_path=modelPath,log_path="./log_noiseSet/")


epoch 1	loss : 0.6934706568717957	Acc: 0.5036236051995858
epoch 2	loss : 0.6934455633163452	Acc: 0.5036236051995858
epoch 3	loss : 0.6934204697608948	Acc: 0.5036236051995858
epoch 4	loss : 0.6933956146240234	Acc: 0.5036236051995858
epoch 5	loss : 0.693371057510376	Acc: 0.5036236051995858
epoch 6	loss : 0.6933466196060181	Acc: 0.5036236051995858
epoch 7	loss : 0.6933227181434631	Acc: 0.5036236051995858
epoch 8	loss : 0.6932989358901978	Acc: 0.5036236051995858
epoch 9	loss : 0.6932752132415771	Acc: 0.5036236051995858
epoch 10	loss : 0.6932516098022461	Acc: 0.5036236051995858
epoch 11	loss : 0.6932278871536255	Acc: 0.5036236051995858
epoch 12	loss : 0.6932045817375183	Acc: 0.5036236051995858
epoch 13	loss : 0.6931816339492798	Acc: 0.5036236051995858
epoch 14	loss : 0.6931588053703308	Acc: 0.5036236051995858
epoch 15	loss : 0.6931361556053162	Acc: 0.5036236051995858
epoch 16	loss : 0.6931135058403015	Acc: 0.5036236051995858
epoch 17	loss : 0.6930915117263794	Acc: 0.5036236051995858
epoch 1

In [12]:
testModel = Net(x.shape[1])
mapped = test(testModel,model_path=modelPath,testDataPath = "./dataset/test_denoised.csv")
df = pd.DataFrame.from_dict(mapped)
df["Transported"] = df["Transported"].astype(bool)
df.to_csv("./dataset/result.csv",index=False)