In [1]:
#!pip install torch

In [2]:
import torch
import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.svm import SVR
import torch.nn as nn 
import torch.optim

In [3]:
#load the preapred data
train = pd.read_csv("clean_data.csv")
test = pd.read_csv("clean_test.csv")

In [4]:
X = train.drop('Transported', axis =1)
y = train['Transported']

In [5]:
#Standardizing features 
sc = StandardScaler()
X = sc.fit_transform(X)

In [6]:
#split data in train/test sets using K-Folds cross-validator

kf = KFold(n_splits=5)

for train_index, val_index in kf.split(X):
    print("TRAIN:", train_index, "VALIDATION:", val_index)
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

TRAIN: [1739 1740 1741 ... 8690 8691 8692] VALIDATION: [   0    1    2 ... 1736 1737 1738]
TRAIN: [   0    1    2 ... 8690 8691 8692] VALIDATION: [1739 1740 1741 ... 3475 3476 3477]
TRAIN: [   0    1    2 ... 8690 8691 8692] VALIDATION: [3478 3479 3480 ... 5214 5215 5216]
TRAIN: [   0    1    2 ... 8690 8691 8692] VALIDATION: [5217 5218 5219 ... 6952 6953 6954]
TRAIN: [   0    1    2 ... 6952 6953 6954] VALIDATION: [6955 6956 6957 ... 8690 8691 8692]


In [7]:
#Creates a Tensor from a numpy.ndarray.

y_train = y_train.to_numpy()
y_val = y_val.to_numpy()

X_train=torch.from_numpy(X_train.astype(np.float32))
X_val=torch.from_numpy(X_val.astype(np.float32))

y_train=torch.from_numpy(y_train.astype(np.float32))
y_val=torch.from_numpy(y_val.astype(np.float32))

In [8]:
n_input_features = X.shape[1]

In [9]:
# model implementation
            
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x)).squeeze(-1)

        return y_pred

In [10]:
model = Model(n_input_features)

criterion = nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [11]:
# Training loop
for epoch in range(1000):
    y_pred = model(X_train)

    # Compute and print loss
    loss = criterion(y_pred, y_train)
    print(f'Epoch {epoch + 1}/1000 | Loss: {loss.item():.4f}')

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch 1/1000 | Loss: 0.6504
Epoch 2/1000 | Loss: 0.6496
Epoch 3/1000 | Loss: 0.6488
Epoch 4/1000 | Loss: 0.6480
Epoch 5/1000 | Loss: 0.6472
Epoch 6/1000 | Loss: 0.6464
Epoch 7/1000 | Loss: 0.6456
Epoch 8/1000 | Loss: 0.6448
Epoch 9/1000 | Loss: 0.6440
Epoch 10/1000 | Loss: 0.6432
Epoch 11/1000 | Loss: 0.6424
Epoch 12/1000 | Loss: 0.6416
Epoch 13/1000 | Loss: 0.6409
Epoch 14/1000 | Loss: 0.6401
Epoch 15/1000 | Loss: 0.6393
Epoch 16/1000 | Loss: 0.6386
Epoch 17/1000 | Loss: 0.6378
Epoch 18/1000 | Loss: 0.6371
Epoch 19/1000 | Loss: 0.6363
Epoch 20/1000 | Loss: 0.6356
Epoch 21/1000 | Loss: 0.6349
Epoch 22/1000 | Loss: 0.6341
Epoch 23/1000 | Loss: 0.6334
Epoch 24/1000 | Loss: 0.6327
Epoch 25/1000 | Loss: 0.6320
Epoch 26/1000 | Loss: 0.6313
Epoch 27/1000 | Loss: 0.6305
Epoch 28/1000 | Loss: 0.6298
Epoch 29/1000 | Loss: 0.6291
Epoch 30/1000 | Loss: 0.6285
Epoch 31/1000 | Loss: 0.6278
Epoch 32/1000 | Loss: 0.6271
Epoch 33/1000 | Loss: 0.6264
Epoch 34/1000 | Loss: 0.6257
Epoch 35/1000 | Loss: 0

Epoch 307/1000 | Loss: 0.5312
Epoch 308/1000 | Loss: 0.5311
Epoch 309/1000 | Loss: 0.5309
Epoch 310/1000 | Loss: 0.5308
Epoch 311/1000 | Loss: 0.5306
Epoch 312/1000 | Loss: 0.5304
Epoch 313/1000 | Loss: 0.5303
Epoch 314/1000 | Loss: 0.5301
Epoch 315/1000 | Loss: 0.5300
Epoch 316/1000 | Loss: 0.5298
Epoch 317/1000 | Loss: 0.5296
Epoch 318/1000 | Loss: 0.5295
Epoch 319/1000 | Loss: 0.5293
Epoch 320/1000 | Loss: 0.5292
Epoch 321/1000 | Loss: 0.5290
Epoch 322/1000 | Loss: 0.5289
Epoch 323/1000 | Loss: 0.5287
Epoch 324/1000 | Loss: 0.5286
Epoch 325/1000 | Loss: 0.5284
Epoch 326/1000 | Loss: 0.5282
Epoch 327/1000 | Loss: 0.5281
Epoch 328/1000 | Loss: 0.5279
Epoch 329/1000 | Loss: 0.5278
Epoch 330/1000 | Loss: 0.5276
Epoch 331/1000 | Loss: 0.5275
Epoch 332/1000 | Loss: 0.5273
Epoch 333/1000 | Loss: 0.5272
Epoch 334/1000 | Loss: 0.5271
Epoch 335/1000 | Loss: 0.5269
Epoch 336/1000 | Loss: 0.5268
Epoch 337/1000 | Loss: 0.5266
Epoch 338/1000 | Loss: 0.5265
Epoch 339/1000 | Loss: 0.5263
Epoch 340/

Epoch 641/1000 | Loss: 0.4989
Epoch 642/1000 | Loss: 0.4989
Epoch 643/1000 | Loss: 0.4988
Epoch 644/1000 | Loss: 0.4988
Epoch 645/1000 | Loss: 0.4987
Epoch 646/1000 | Loss: 0.4986
Epoch 647/1000 | Loss: 0.4986
Epoch 648/1000 | Loss: 0.4985
Epoch 649/1000 | Loss: 0.4985
Epoch 650/1000 | Loss: 0.4984
Epoch 651/1000 | Loss: 0.4984
Epoch 652/1000 | Loss: 0.4983
Epoch 653/1000 | Loss: 0.4982
Epoch 654/1000 | Loss: 0.4982
Epoch 655/1000 | Loss: 0.4981
Epoch 656/1000 | Loss: 0.4981
Epoch 657/1000 | Loss: 0.4980
Epoch 658/1000 | Loss: 0.4980
Epoch 659/1000 | Loss: 0.4979
Epoch 660/1000 | Loss: 0.4979
Epoch 661/1000 | Loss: 0.4978
Epoch 662/1000 | Loss: 0.4978
Epoch 663/1000 | Loss: 0.4977
Epoch 664/1000 | Loss: 0.4976
Epoch 665/1000 | Loss: 0.4976
Epoch 666/1000 | Loss: 0.4975
Epoch 667/1000 | Loss: 0.4975
Epoch 668/1000 | Loss: 0.4974
Epoch 669/1000 | Loss: 0.4974
Epoch 670/1000 | Loss: 0.4973
Epoch 671/1000 | Loss: 0.4973
Epoch 672/1000 | Loss: 0.4972
Epoch 673/1000 | Loss: 0.4972
Epoch 674/

Epoch 976/1000 | Loss: 0.4852
Epoch 977/1000 | Loss: 0.4852
Epoch 978/1000 | Loss: 0.4852
Epoch 979/1000 | Loss: 0.4851
Epoch 980/1000 | Loss: 0.4851
Epoch 981/1000 | Loss: 0.4851
Epoch 982/1000 | Loss: 0.4851
Epoch 983/1000 | Loss: 0.4850
Epoch 984/1000 | Loss: 0.4850
Epoch 985/1000 | Loss: 0.4850
Epoch 986/1000 | Loss: 0.4849
Epoch 987/1000 | Loss: 0.4849
Epoch 988/1000 | Loss: 0.4849
Epoch 989/1000 | Loss: 0.4849
Epoch 990/1000 | Loss: 0.4848
Epoch 991/1000 | Loss: 0.4848
Epoch 992/1000 | Loss: 0.4848
Epoch 993/1000 | Loss: 0.4847
Epoch 994/1000 | Loss: 0.4847
Epoch 995/1000 | Loss: 0.4847
Epoch 996/1000 | Loss: 0.4847
Epoch 997/1000 | Loss: 0.4846
Epoch 998/1000 | Loss: 0.4846
Epoch 999/1000 | Loss: 0.4846
Epoch 1000/1000 | Loss: 0.4845


In [12]:
with torch.no_grad():
 y_pred=model(X_val)
 y_pred_class=y_pred.round()
 accuracy=(y_pred_class.eq(y_val).sum())/float(y_val.shape[0])
 print(accuracy.item())

0.7779056429862976


In [13]:
test = test.to_numpy()
test=torch.from_numpy(test.astype(np.float32))

with torch.no_grad():
 predictions=model(test)
 predictions_class=predictions.round()

In [14]:
predictions = torch.round(predictions)
predictions = torch.gt(predictions, 0)

predictions

tensor([False, False, False,  ...,  True,  True,  True])

In [15]:
predictions.numpy()

array([False, False, False, ...,  True,  True,  True])

In [16]:
test_unprepared =pd.read_csv("test.csv")
passenger_id = test_unprepared['PassengerId']

submission = pd.DataFrame({'PassengerId':passenger_id,'Transported':predictions})
submission.head()

Unnamed: 0,PassengerId,Transported
0,0013_01,False
1,0018_01,False
2,0019_01,False
3,0021_01,True
4,0023_01,True
