In [77]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder , OneHotEncoder , StandardScaler
from sklearn.impute import SimpleImputer 

In [78]:
df = pd.read_csv("Dataset/Titanic/train.csv")
df.sample(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
417,418,1,2,"Silven, Miss. Lyyli Karoliina",female,18.0,0,2,250652,13.0,,S
874,875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28.0,1,0,P/PP 3381,24.0,,C
14,15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14.0,0,0,350406,7.8542,,S
105,106,0,3,"Mionoff, Mr. Stoytcho",male,28.0,0,0,349207,7.8958,,S
671,672,0,1,"Davidson, Mr. Thornton",male,31.0,1,0,F.C. 12750,52.0,B71,S


In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [80]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [81]:
# Preprocss the datset
df = df.drop(columns= ['Name' , 'Cabin' ,"PassengerId" , "Ticket"])

# Split the dataset
y = df['Survived']
X = df.drop(columns= ['Survived'])
X_train , x_test , y_train , y_test = train_test_split(X, y , test_size= 0.2 , random_state= 42)
X_train.shape , x_test.shape

# impuation for Age
Si = SimpleImputer(strategy= "mean")
X_train['Age'] = Si.fit_transform(X_train[['Age']])
x_test['Age'] = Si.transform(x_test[["Age"]])

# Impuation for Embarked
X_train = X_train.fillna({'Embarked': 'missing'})  # Replace with 'missing' instead of dropping
x_test = x_test.fillna({'Embarked': 'missing'})

# OHE
object_columns = X_train.select_dtypes(include=['object']).columns.tolist()
ohe = OneHotEncoder(sparse_output= False)

X_train_ohe_encoder = ohe.fit_transform(X_train[object_columns])
x_test_ohe_encoder = ohe.transform(x_test[object_columns])

X_train_ohe_df = pd.DataFrame(X_train_ohe_encoder , columns= ohe.get_feature_names_out(object_columns), index= X_train.index)
x_test_ohe_df = pd.DataFrame(x_test_ohe_encoder , columns= ohe.get_feature_names_out(object_columns) , index= x_test.index)

X_train = pd.concat([X_train , X_train_ohe_df] , axis= 1)
x_test = pd.concat([x_test , x_test_ohe_df] , axis= 1)

X_train = X_train.drop(object_columns , axis= 1)
x_test = x_test.drop(object_columns , axis= 1)

# Scale Values
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
x_test = scaler.transform(x_test)


In [82]:
# Label Encoder
LE = LabelEncoder()
y_train = LE.fit_transform(y_train)
y_test = LE.transform(y_test)
X_train.shape , y_train.shape

((712, 11), (712,))

In [83]:
# Converting into tensor
import numpy as np 

X_train = np.array(X_train)
x_test = np.array(x_test)
X_train = torch.from_numpy(X_train)
x_test = torch.from_numpy(x_test)
y_test = torch.from_numpy(y_test)
y_train = torch.from_numpy(y_train)

In [84]:
# Creating a Neural Network in Pytorch
import math
class NeuralNetwork():
    
    def __init__(self , Traning_set):
        self.weight = torch.rand(Traning_set.shape[1] , 1 , dtype= torch.float64 , requires_grad= True)
        self.bias = torch.zeros(1 , dtype= torch.float64 , requires_grad= True)
        
    def forward(self , Traning_set):
        z = torch.matmul(Traning_set , self.weight) + self.bias
        y_pred = torch.sigmoid(z) 
        return y_pred
    
    def Loss(self , y_pred , y_true):
        eplion = 1e-7
        y_pred = torch.clamp(y_pred , eplion , 1-eplion)
        
        loss = -(y_true * torch.log(y_pred) + (1-y_true) * torch.log(1-y_pred)).mean()
        return loss
        

    
        

In [85]:
epoch = 20
learning_rate = 0.1

In [86]:
# Traning Pipeline

model = NeuralNetwork(X_train)
model.bias.shape
model.weight.shape # is euqual = (X_train.shape[1] , 1)  


for ep in range(epoch):
    # Forward pass
    y_pred = model.forward(X_train)
    
    # Loss calculate
    loss = model.Loss(y_pred , y_train)
    
    # backward pass
    loss.backward()
    
    # updated weights and bias
    with torch.no_grad():
        model.weight -= model.weight.grad*learning_rate
        model.bias -= model.bias.grad*learning_rate
    
    # Loss in each epoch 
    model.weight.grad.zero_()
    model.weight.grad.zero_()
    print(f"For epoch {ep+1} loss is {loss}")
    # pass
    

For epoch 1 loss is 0.9859585830982732
For epoch 2 loss is 0.9723396298570769
For epoch 3 loss is 0.958385652157675
For epoch 4 loss is 0.9441560770840487
For epoch 5 loss is 0.9297675519945662
For epoch 6 loss is 0.9153866220693017
For epoch 7 loss is 0.901217843180029
For epoch 8 loss is 0.887488114765411
For epoch 9 loss is 0.8744284408336233


For epoch 10 loss is 0.8622546752221512
For epoch 11 loss is 0.8511490175835349
For epoch 12 loss is 0.8412440419822713
For epoch 13 loss is 0.8326108274531983
For epoch 14 loss is 0.8252523285645896
For epoch 15 loss is 0.8191025331803449
For epoch 16 loss is 0.8140313045428794
For epoch 17 loss is 0.8098542113538892
For epoch 18 loss is 0.8063462108112838
For epoch 19 loss is 0.8032578187444498
For epoch 20 loss is 0.8003323765743451


In [93]:
# Evaluate the model

with torch.no_grad():
    y_pred = model.forward(x_test)
    y_pred = (y_pred > 0.6).float()  # threshold = 0.6
    
    acc = (y_pred == y_test).float().mean() # to calculate accuracy
    
    print(f"Accuracy: {acc}")
    

Accuracy: 0.5759495496749878
