<a href="https://colab.research.google.com/github/SelamZem/House_Prices_Prediction/blob/main/titanic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.nn as nn

In [2]:
def wrangle(file):
  df = pd.read_csv(file).set_index("PassengerId")

  #fill the null values with mean value for(number columns) and mode values for (object columns)
  for col in df.columns:
    if df[col].dtype == 'object':
      df[col] = df[col].fillna(df[col].mode()[0])
    else :
      df[col] = df[col].fillna(df[col].mean())

  # drop Name and Cabin
  df = df.drop(['Name', 'Cabin'], axis=1)

  # change the object columns to dummies
  for col in df.columns:
    if df[col].dtype == 'object':
      df = pd.get_dummies(df, columns=[col], drop_first=True)

  # change bool values to float
  for col in df.columns:
    if df[col].dtype == 'bool':
      df[col] = df[col].astype(float)



  return df

In [3]:
data = wrangle('/content/train.csv')

  df[col] = df[col].fillna(df[col].mode()[0])


In [4]:
data.head(5)

Unnamed: 0_level_0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,HomePlanet_Europa,HomePlanet_Mars,Destination_PSO J318.5-22,Destination_TRAPPIST-1e
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0001_01,0.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
0002_01,0.0,24.0,0.0,109.0,9.0,25.0,549.0,44.0,1.0,0.0,0.0,0.0,1.0
0003_01,0.0,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0,0.0,1.0,0.0,0.0,1.0
0003_02,0.0,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0,0.0,1.0,0.0,0.0,1.0
0004_01,0.0,16.0,0.0,303.0,70.0,151.0,565.0,2.0,1.0,0.0,0.0,0.0,1.0


In [5]:
#checking the percentage of null values
(data.isnull().sum()/data.shape[0])*100

Unnamed: 0,0
CryoSleep,0.0
Age,0.0
VIP,0.0
RoomService,0.0
FoodCourt,0.0
ShoppingMall,0.0
Spa,0.0
VRDeck,0.0
Transported,0.0
HomePlanet_Europa,0.0


In [6]:
#checking object columns
for col in data.columns:
  if data[col].dtype == 'object':
    print(col, data[col].nunique())

In [7]:
data.describe()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,HomePlanet_Europa,HomePlanet_Mars,Destination_PSO J318.5-22,Destination_TRAPPIST-1e
count,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0,8693.0
mean,0.349362,28.82793,0.022892,224.687617,458.077203,173.729169,311.138778,304.854791,0.503624,0.24514,0.202347,0.091568,0.701369
std,0.476796,14.339054,0.149568,659.739364,1594.434978,597.41744,1124.675871,1133.259049,0.500016,0.430195,0.401772,0.288432,0.457684
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
75%,1.0,37.0,0.0,78.0,118.0,45.0,89.0,71.0,1.0,0.0,0.0,0.0,1.0
max,1.0,79.0,1.0,14327.0,29813.0,23492.0,22408.0,24133.0,1.0,1.0,1.0,1.0,1.0


In [8]:
# Vertical split
X = data.drop('Transported', axis=1)
y = data['Transported']

In [9]:
#Turn data into tensor
X = torch.tensor(X.values).float()
y = torch.tensor(y.values).float()
X[:5], y[:5]

(tensor([[0.0000e+00, 3.9000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00],
         [0.0000e+00, 2.4000e+01, 0.0000e+00, 1.0900e+02, 9.0000e+00, 2.5000e+01,
          5.4900e+02, 4.4000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00],
         [0.0000e+00, 5.8000e+01, 1.0000e+00, 4.3000e+01, 3.5760e+03, 0.0000e+00,
          6.7150e+03, 4.9000e+01, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00],
         [0.0000e+00, 3.3000e+01, 0.0000e+00, 0.0000e+00, 1.2830e+03, 3.7100e+02,
          3.3290e+03, 1.9300e+02, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00],
         [0.0000e+00, 1.6000e+01, 0.0000e+00, 3.0300e+02, 7.0000e+01, 1.5100e+02,
          5.6500e+02, 2.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00]]),
 tensor([0., 1., 0., 0., 1.]))

In [10]:
# Horizontal split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([6954, 12]),
 torch.Size([1739, 12]),
 torch.Size([6954]),
 torch.Size([1739]))

In [11]:
device= 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [12]:
X_train

tensor([[ 0., 28.,  0.,  ...,  0.,  0.,  1.],
        [ 0., 17.,  0.,  ...,  0.,  0.,  1.],
        [ 1., 28.,  0.,  ...,  0.,  0.,  0.],
        ...,
        [ 0., 22.,  0.,  ...,  0.,  1.,  0.],
        [ 0., 34.,  0.,  ...,  1.,  0.,  1.],
        [ 0., 28.,  0.,  ...,  0.,  0.,  0.]])

In [33]:
class Tranported(nn.Module):
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Linear(in_features=12, out_features=100),
        nn.ReLU(),
        nn.Linear(in_features=100, out_features=50),
        nn.ReLU(),
        nn.Linear(in_features=50, out_features=50),
        nn.Linear(in_features=50, out_features=1),

        nn.Sigmoid()
    )

  def forward(self, x):
    return self.layers(x)



In [34]:
model_0 = Tranported().to(device)
model_0

Tranported(
  (layers): Sequential(
    (0): Linear(in_features=12, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=50, bias=True)
    (3): ReLU()
    (4): Linear(in_features=50, out_features=50, bias=True)
    (5): Linear(in_features=50, out_features=1, bias=True)
    (6): Sigmoid()
  )
)

In [35]:
untrained_preds = model_0(X_test.to(device))
abc = torch.round(untrained_preds[:10])
print(abc[:10])

tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.]], device='cuda:0', grad_fn=<SliceBackward0>)


In [36]:
#Loss function and optimizer
loss_fn = nn.BCELoss()
Optimizer = torch.optim.Adam(params=model_0.parameters(), lr=0.01)

In [37]:
#calculate accuracy
def accuracy_fn(y_true, y_pred):
  correct = torch.eq(y_true, y_pred).sum().item()
  acc = (correct/len(y_pred))*100
  return acc

In [38]:
# Train model
with torch.inference_mode():
  untrained_preds = model_0(X_test.to(device))
untrained_preds[:10]

tensor([[1.5040e-28],
        [7.3268e-10],
        [6.3506e-01],
        [3.1763e-10],
        [6.4002e-01],
        [5.1954e-25],
        [3.2021e-10],
        [4.2870e-13],
        [6.4393e-01],
        [5.7602e-06]], device='cuda:0')

In [39]:
torch.cuda.manual_seed(42)
torch.manual_seed(42)

epochs = 500

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
  #Train the model
  model_0.train()

  #Forward pass
  y_logits = model_0(X_train).squeeze()
  y_pred = torch.round(torch.sigmoid(y_logits))

  #Calculate loss
  loss = loss_fn(y_logits, y_train)
  acc = accuracy_fn(y_true=y_train, y_pred=y_pred)

  #zero grad optimizer
  Optimizer.zero_grad()

  #loss backward
  loss.backward()

  #optimizer step
  Optimizer.step()

  #Test the model
  model_0.eval()
  with torch.inference_mode():
    test_logits = model_0(X_test).squeeze()
    test_pred = torch.round(torch.sigmoid(test_logits))

    #calculate loss and accuracy
    test_loss = loss_fn(test_logits, y_test)
    test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)

  #print
  if(epoch%50 == 0):
    print(f"Epoch: {epoch} | Loss: {loss:.5f} | Acc: {acc:.2f}% | Test Loss: {test_loss:.5f} | Test Acc: {test_acc:.2f}%")


Epoch: 0 | Loss: 9.41345 | Acc: 63.91% | Test Loss: 41.44468 | Test Acc: 50.49%
Epoch: 50 | Loss: 41.78288 | Acc: 50.33% | Test Loss: 41.58727 | Test Acc: 50.49%
Epoch: 100 | Loss: 41.75499 | Acc: 50.33% | Test Loss: 41.55802 | Test Acc: 50.49%
Epoch: 150 | Loss: 41.75239 | Acc: 50.33% | Test Loss: 41.55627 | Test Acc: 50.49%
Epoch: 200 | Loss: 41.75109 | Acc: 50.33% | Test Loss: 41.55580 | Test Acc: 50.49%
Epoch: 250 | Loss: 41.75082 | Acc: 50.33% | Test Loss: 41.55688 | Test Acc: 50.49%
Epoch: 300 | Loss: 41.74997 | Acc: 50.33% | Test Loss: 41.55416 | Test Acc: 50.49%
Epoch: 350 | Loss: 41.74966 | Acc: 50.33% | Test Loss: 41.55426 | Test Acc: 50.49%
Epoch: 400 | Loss: 41.74966 | Acc: 50.33% | Test Loss: 41.55647 | Test Acc: 50.49%
Epoch: 450 | Loss: 41.74931 | Acc: 50.33% | Test Loss: 41.55425 | Test Acc: 50.49%
