In [1]:
%pip install torch tensorboardX tensorboard pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from tensorboardX import SummaryWriter

In [3]:

class CustomDataSet(Dataset):
    def __init__(self, x,y):
        self.data = x
        self.label = y
        
    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, index):
        data = self.data[index]
        label = self.label[index]

        return data, label

In [4]:
class Net(nn.Module):
  def __init__(self,input_shape, modelWidth = 1024):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_shape,modelWidth)
    self.fc2 = nn.Linear(modelWidth,modelWidth)
    self.fc3 = nn.Linear(modelWidth,1)
  def forward(self,x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = torch.sigmoid(self.fc3(x))
    return x


In [5]:
csv_file_path = "./dataset/train_preprocessed.csv"
df = pd.read_csv(csv_file_path)
y = df["Survived"].values
df = df.drop("Survived", axis=1)
x = df.values

print(x.shape)


(891, 10)


In [6]:
# Lets create an object from our custom dataset class
train_data_object = CustomDataSet(x,y)

# Now lets use Data loader to load the data in batches
train_loader = torch.utils.data.DataLoader(
        train_data_object,
        batch_size=128,
        shuffle=False
    )

In [7]:
# train_features, train_labels = next(iter(train_loader))
# print(f"Feature batch shape: {train_features.size()}")
# print(f"Labels batch shape: {train_labels.size()}")
# # print(train_features,train_labels)
# testModel = Net(20)
# train_features = train_features.to(torch.float32)
# output = testModel(train_features)
# output = torch.round(output)
# print(output)
# print(train_labels)
# print(output.eq(train_labels.data.view_as(output)).cpu().sum())

In [8]:
model = Net(x.shape[1])
optimizer = optim.SGD(model.parameters(), lr=0.1)
loss_fn = nn.BCELoss()


In [9]:
def train(model,epochs = 10, model_path = "./model.pth",log_path = "./log_pytorch/"):
    tb = SummaryWriter(log_path)

    # put the model into training mode
    model.train()
    for epoch in range(1, epochs + 1):
        correct = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            
            data = data.to(torch.float32)
            # print("data",data)
            target = target.to(torch.float32)
            target = target.unsqueeze(1)
            #calculate output
            output = model(data)
            # print(output)
            #calculate loss
            loss = loss_fn(output,target)
        
            #accuracy
            predicted = model(torch.tensor(x,dtype=torch.float32))
            acc = (predicted.reshape(-1).detach().numpy().round() == y).mean()
            #backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % 100 == 0:
                # losses.append(loss)
                # accur.append(acc)
                print("epoch {}\tloss : {}\tAcc: {}".format(epoch,loss,acc))

        tb.add_scalar("epoch loss", loss.item(), epoch)
        tb.add_scalar("epoch accuracy", acc, epoch)
        for name, weight in model.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad',weight.grad, epoch)
    # save the model to a .pth file
    print('Saving NN to %s' % model_path)
    torch.save(model.state_dict(), model_path)
    # add graph to tensorboard
    tb.add_graph(model, (data,))

In [10]:
def test(model, model_path="./model.pth",testDataPath = "./dataset/test_preprocessed.csv"):

    df = pd.read_csv(testDataPath)
    yt = df["PassengerId"]
    df = df.drop("PassengerId", axis=1)
    xt = df.values

    model.load_state_dict(torch.load(model_path))
    
    # put model into test mode
    model.eval()
    with torch.no_grad():
    
        output = model(torch.tensor(xt,dtype=torch.float32))
        pred = output.reshape(-1).detach().numpy().round()
        label = yt.values
        
        result = {"PassengerId" : label,
                  "Survived" : pred}
        
        return result

In [11]:

train(model,epochs=100,model_path="./model.pth",log_path="./log_wide/")


epoch 1	loss : 0.6848648190498352	Acc: 0.6161616161616161
epoch 2	loss : 0.5985071659088135	Acc: 0.6576879910213244
epoch 3	loss : 0.5608227252960205	Acc: 0.755331088664422
epoch 4	loss : 0.5341880321502686	Acc: 0.7822671156004489
epoch 5	loss : 0.5174116492271423	Acc: 0.7811447811447811
epoch 6	loss : 0.5076558589935303	Acc: 0.7890011223344556
epoch 7	loss : 0.5022929906845093	Acc: 0.8047138047138047
epoch 8	loss : 0.4990037977695465	Acc: 0.8069584736251403
epoch 9	loss : 0.49664175510406494	Acc: 0.8069584736251403
epoch 10	loss : 0.49499496817588806	Acc: 0.8069584736251403
epoch 11	loss : 0.4935717284679413	Acc: 0.8069584736251403
epoch 12	loss : 0.49226856231689453	Acc: 0.8069584736251403
epoch 13	loss : 0.4911797046661377	Acc: 0.8069584736251403
epoch 14	loss : 0.4901178479194641	Acc: 0.8069584736251403
epoch 15	loss : 0.4890228807926178	Acc: 0.8080808080808081
epoch 16	loss : 0.48799237608909607	Acc: 0.8080808080808081
epoch 17	loss : 0.4869036078453064	Acc: 0.8080808080808081
epo

In [12]:
testModel = Net(x.shape[1])
print(x.shape[1])
mapped = test(testModel,model_path="./model.pth",testDataPath = "./dataset/test_preprocessed.csv")
df = pd.DataFrame.from_dict(mapped)
df["Survived"] = df["Survived"].astype(int)
df.to_csv("./dataset/result.csv",index=False)

10
