# Import Dependencies/Data and Instantiate Objects

In [6]:
%pip install torch
%pip install mltable

import os
import numpy as np
import pandas as pd
import torch
from torch import utils, nn, optim
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
# import torchtext.functional as F
from azure.ai.ml.entities import DataImport
from azure.ai.ml.data_transfer import Database
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
import mltable
import math

# authenticate and create client
credential = DefaultAzureCredential()

# ml_client = MLClient.from_config(credential=DefaultAzureCredential())
ml_client = MLClient(
    credential=credential,
    subscription_id="1dc97be4-3550-41c0-b2a9-cfdd85ea7713",
    resource_group_name="ResourceGroup-AI-ML",
    workspace_name="Thomas-AIServices-1"
)

# import data set and convert to Pandas table
ml_data_asset = ml_client.data.get("TitanicDataAsset", version="1")
# path_to_data_asset = { 'file': ml_data_asset.Path }
# ml_table = mltable.from_delimited_files(paths=[path_to_data_asset])
# pd_data_frame = ml_table.to_pandas_dataframe()

# priont top 5 results to validate data ingestion
pd_data_frame = pd.read_csv(ml_data_asset.path)
pd_data_frame.head()

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# Split Train and Test Data

In [8]:
msk = np.random.rand(len(pd_data_frame)) < .8

train_data = pd_data_frame[msk]
test_data = pd_data_frame[~msk]

train_data.to_csv('./data/training_data.csv', index=False)
test_data.to_csv('./data/testing_data.csv', index=False)

train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S


# Instantiate Dataset Class for Easy Manipulation
Cleans the data by:
removing all columns but the relevant features
removing rows with null features
replacing binary or categorical characteristics with numeric representations

In [12]:
class TitanicDataset(Dataset):
    def __init__(self, data_file):
        # load data from CSV
        data_frame = pd.read_csv(data_file)

        # data preperation
        data_frame = data_frame[['Survived', 'Sex', 'Age', 'Pclass']]
        data_frame = data_frame.dropna() # drops rows with None or NaN values
        
        data_frame['Sex'] = data_frame['Sex'].replace(['male', 'female'], [1,0])
        data_frame['Survived'] = data_frame['Survived'].replace([True, False], [1,0])

        # Turned out not to be needed, but may be useful
        #data_frame['Age'] = data_frame['Age'] / data_frame['Age'].abs().max()
        #data_frame['PClass'] = data_frame['PClass'] / data_frame['PClass'].abs.max()

        self.data_frame = data_frame

    def __len__(self):
        return len(self.data_frame)

    def __getItem__(self, idx):
        features = torch.tensor(self.data_frame.iloc[idx, [1,2,3]].values, dtype=torch.float)
        label = torch.tensor(self.data_frame.iloc[idx, 0], dtype=torch.float)
        return features, label

# Instantiate dataset and Test Output

In [24]:
dataset =  TitanicDataset('./data/training_data.csv')
dataset.data_frame.head()

Unnamed: 0,Survived,Sex,Age,Pclass
0,0,1,22.0,3
1,1,0,38.0,1
2,1,0,26.0,3
3,1,0,35.0,1
4,0,1,54.0,1


# Create a Model Class

In [27]:
class BinaryClassifier(nn.Module):
    def __init__(self):
        super(BinaryClassifier, self).__init__()
        self.layer1 = nn.Linear(3, 16)
        self.layer2 = nn.Linear(16, 16)
        self.layer3 = nn.Linear(16, 1)
        self.signmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = torch.sigmoid(self.layer3(x))
        return x

# Training

In [28]:
def train_model(data_frame, model, total_epochs=50):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001) # try SGD too

    for epoch in range(total_epochs):
        for _, row in data_frame.iterrows():
            # Convert row to a tensor and extra dimension
            x = torch.Tensor(row[['Sex', 'Age', 'Pclass']].values).view(1, -1)#.unsqueeze(0)
            y = torch.Tensor([row['Survived']]).unsqueeze(0) # change this to view to make sure you're understanding what is happening

            optimizer.zero_grad()
            output = model(x)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch + 1}/{total_epochs} Loss: {loss.item()}')

model = BinaryClassifier()
train_model(dataset.data_frame, model)

Epoch 0/50 Loss: 0.5587952733039856
Epoch 1/50 Loss: 0.5156165957450867
Epoch 2/50 Loss: 0.4973379671573639
Epoch 3/50 Loss: 0.43688634037971497
Epoch 4/50 Loss: 0.3568241596221924
Epoch 5/50 Loss: 0.2867208421230316
Epoch 6/50 Loss: 0.23886951804161072
Epoch 7/50 Loss: 0.20660677552223206
Epoch 8/50 Loss: 0.18851670622825623
Epoch 9/50 Loss: 0.1754479706287384
Epoch 10/50 Loss: 0.16513466835021973
Epoch 11/50 Loss: 0.15754011273384094
Epoch 12/50 Loss: 0.15125973522663116
Epoch 13/50 Loss: 0.14581888914108276
Epoch 14/50 Loss: 0.1413525640964508
Epoch 15/50 Loss: 0.1373540163040161
Epoch 16/50 Loss: 0.13385212421417236
Epoch 17/50 Loss: 0.13030681014060974
Epoch 18/50 Loss: 0.1270875632762909
Epoch 19/50 Loss: 0.12408927828073502
Epoch 20/50 Loss: 0.12141433358192444
Epoch 21/50 Loss: 0.11919459700584412
Epoch 22/50 Loss: 0.10458424687385559
Epoch 23/50 Loss: 0.109219029545784
Epoch 24/50 Loss: 0.10687670856714249
Epoch 25/50 Loss: 0.10394314676523209
Epoch 26/50 Loss: 0.1028720363974

# Calculate Accuracy with Test Data

In [32]:
testing_data = TitanicDataset('./data/testing_data.csv')
correct_preddictions = 0

for _, row in testing_data.data_frame.iterrows():
    x = torch.Tensor(row[['Sex', 'Age', 'Pclass']].values).view(1,-1)
    output = model(x)
    prediction = round(output.item())
    if prediction == row['Survived']:
        correct_preddictions += 1

print(f'Accuracy: {correct_preddictions / len(testing_data)}')

Accuracy: 0.8518518518518519


# Save the Model

In [34]:
torch.save(model.state_dict(), './models/Adam_BCELoss_lr_001.pth')

# Load the Model

In [35]:
model = BinaryClassifier()
model.load_state_dict(torch.load('./models/Adam_BCELoss_lr_001.pth'))

<All keys matched successfully>

# Make Predictions

In [None]:
sex = 0
age = 10.0
p_class = 3

while True:
    # Convert features to a PyTorch Tensor
    feature_tensor = torch.Tensor([sex, age, p_class])

    # make prediction
    output = model(feature_tensor)

    # round the number to the nearest boolean
    percent_probability = math.ceil(output.item() * 100)
    prediction = round(output.item())

    if (prediction > .5):
        print('\nSurvived')
        print(f'Confidence: {percent_probability}')
    else:
        print('\nDied')
        print(f'Confidence: {100 - percent_probability}')

    sex = int(input('Enter sex(0 = female, 1 = male): '))
    age = float(input('Enter age: '))
    p_class = int(input('Enter passanger class (values = 1, 2, 3): '))
    