## Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

## Dataset Preparation

In [2]:
train_data = pd.read_csv('titanic datset/train.csv')
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


#### Extract titles from the names colum

In [3]:
dataset_title = [i.split(',')[1].split('.')[0].strip() for i in train_data['Name']]
train_data['Title'] = pd.Series(dataset_title)
train_data['Title'].value_counts()
train_data['Title'] = train_data['Title'].replace(['Lady', 'the Countess', 'Countess', 'Capt', 'Col', 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona', 'Ms', 'Mme', 'Mlle'], 'Rare')
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Title
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,Mr
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,Mrs
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,Miss
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,Mrs
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,Mr


In [4]:
null_values = train_data.isnull().sum()
null_values

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
Title            0
dtype: int64

#### Drop unrealted features & replace or drop null values

In [5]:
train_data = train_data.drop(['PassengerId', 'Cabin', 'Name', 'Ticket'], axis=1)
train_data = train_data.dropna(subset=['Embarked'])
train_data['Age'] = train_data['Age'].fillna(train_data['Age'].mean())

null_values = train_data.isnull().sum()
null_values, train_data.shape

(Survived    0
 Pclass      0
 Sex         0
 Age         0
 SibSp       0
 Parch       0
 Fare        0
 Embarked    0
 Title       0
 dtype: int64,
 (889, 9))

In [6]:
train_data.dtypes

Survived      int64
Pclass        int64
Sex          object
Age         float64
SibSp         int64
Parch         int64
Fare        float64
Embarked     object
Title        object
dtype: object

#### OneHot Encoding categorical features

In [7]:
label_encoder = LabelEncoder()
for column in train_data.select_dtypes(include=['object']).columns:
    train_data[column] = label_encoder.fit_transform(train_data[column]).astype(np.int64)
train_data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
0,0,3,1,22.0,1,0,7.25,2,2
1,1,1,0,38.0,1,0,71.2833,0,3
2,1,3,0,26.0,0,0,7.925,2,1
3,1,1,0,35.0,1,0,53.1,2,3
4,0,3,1,35.0,0,0,8.05,2,2


In [8]:
train_data.dtypes

Survived      int64
Pclass        int64
Sex           int64
Age         float64
SibSp         int64
Parch         int64
Fare        float64
Embarked      int64
Title         int64
dtype: object

In [9]:
train_data.shape

(889, 9)

#### Split dataset into train and validation sets

In [10]:
y = train_data['Survived'].values  # dependent variable
X = train_data.drop(['Survived'], axis=1).values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
X.shape 

(889, 8)

Scale the features

In [12]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

In [13]:
X_train.shape

(711, 8)

#### Data Loading for Neural Network

In [14]:
class TitanicDataset(Dataset):
    def __init__(self, features, targets):
        self.features = torch.FloatTensor(features)
        self.targets = torch.FloatTensor(targets)
    
    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

In [15]:
train_data_boj = TitanicDataset(X_train, y_train)
train_loader = DataLoader(train_data_boj, batch_size=32, shuffle=True)

In [16]:
for inputs, labels in train_loader:
    print(inputs[0].shape)
    print(labels.shape)
    break

torch.Size([8])
torch.Size([32])


## Define & Training Neural Network Model

#### Define ANN

In [17]:
class TitanicModel(nn.Module):
    def __init__(self, input_size):
        super(TitanicModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
        
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = torch.sigmoid(self.layer3(x))
        return x
    
    
model = TitanicModel(8) # number of features = 8

#### Training Loop

In [18]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())
model.train()

num_epochs =130
for epoch in range(num_epochs):
    for features, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, targets.unsqueeze(1))
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # Early stopping condition
    if loss.item() < 0.1 and epoch > 50:
        print(f'Early stopping at epoch {epoch+1} because loss {loss.item():.4f} < 0.1')
        break

Epoch [10/130], Loss: 0.1549
Epoch [20/130], Loss: 0.4150
Epoch [30/130], Loss: 0.3035
Epoch [40/130], Loss: 0.4093
Epoch [50/130], Loss: 0.1926
Early stopping at epoch 53 because loss 0.0861 < 0.1


#### Evaluation 

In [20]:
model.eval()
with torch.no_grad():
    X_val_tensor = torch.FloatTensor(X_val)
    val_predictions = model(X_val_tensor)
    val_predictions = (val_predictions > 0.5).float()
    accuracy = (val_predictions.squeeze() == torch.FloatTensor(y_val)).float().mean()
    print(f'Validation Accuracy: {accuracy.item():.4f}')

Validation Accuracy: 0.8090
