# Intro



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Import data from xlsx

In [4]:
import pandas as pd
df = pd.read_excel('/content/drive/MyDrive/deep_learning_training/experiments/titanic/titanic.xlsx', sheet_name='all_rows')
df = df.dropna(subset=['Age', 'Embarked'])
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.2500,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.9250,S
3,1,1,female,35.0,1,0,53.1000,S
4,0,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...,...
885,0,3,female,39.0,0,5,29.1250,Q
886,0,2,male,27.0,0,0,13.0000,S
887,1,1,female,19.0,0,0,30.0000,S
889,1,1,male,26.0,0,0,30.0000,C


## Normalize / adapt data

In [27]:
import torch
import numpy as np
expected_output = torch.tensor(np.array(df.Survived))
expected_output.shape

torch.Size([712])

In [105]:
input_dict = []

input_dict.append(np.array(df.Survived))

def normalize(x):
  return (x - x.min()) / (x.max() - x.min())

class_np = np.array(df.Pclass)
input_dict.append(np.where(class_np == 1, 1, 0))
input_dict.append(np.where(class_np == 2, 1, 0))

input_dict.append(np.where(np.array(df.Sex) == 'male', 1, 0))

input_dict.append(normalize(np.array(df.Age)))

input_dict.append(normalize(1 + np.array(df.SibSp) + np.array(df.Parch)))

input_dict.append(normalize(np.log10(np.array(df.Fare + 1))))

input_dict.append(np.where(np.array(df.Embarked) == 'S',1,0))

input_dict.append(np.where(np.array(df.Embarked) == 'C',1,0))

input_dict.append(np.ones(len(df.Age)))

## Run linear gradient descent

In [145]:
torch.set_printoptions(precision=2, sci_mode=False)

input_matrix = torch.tensor(input_dict)

train_set_size = 500
train_set = input_matrix[:, :train_set_size]
validation_set = input_matrix[:, train_set_size:]

expected_output_train_set = expected_output[:train_set_size]
expected_output_validation_set = expected_output[train_set_size:]


np.random.seed(42)
input_weights = torch.tensor(np.random.rand(train_set.shape[0]) - .5)
input_weights.requires_grad_()

learning_rate = .1

def forward(set, weights):
  return torch.mv(set.T, weights)

def mse(pred, act):
  return torch.mean(torch.pow(torch.sub(pred,act), 2))

for epoch in range(16):
  generated_output = forward(train_set, input_weights)
  loss = mse(expected_output_train_set, generated_output)
  loss.backward()
  with torch.no_grad():
    input_weights -= input_weights.grad * learning_rate
    input_weights.grad.zero_()

  print(f'Loss at epoch {epoch}: {loss:.3f}')

generated_output = forward(validation_set, input_weights)
correct = (expected_output_validation_set == torch.round(generated_output)).sum() / len(generated_output)
print(f'Validation set correct: {correct * 100}%')


Loss at epoch 0: 0.354
Loss at epoch 1: 0.302
Loss at epoch 2: 0.262
Loss at epoch 3: 0.230
Loss at epoch 4: 0.203
Loss at epoch 5: 0.179
Loss at epoch 6: 0.159
Loss at epoch 7: 0.142
Loss at epoch 8: 0.128
Loss at epoch 9: 0.115
Loss at epoch 10: 0.104
Loss at epoch 11: 0.095
Loss at epoch 12: 0.087
Loss at epoch 13: 0.080
Loss at epoch 14: 0.074
Loss at epoch 15: 0.068
Validation set correct: 97.16980743408203%


## Run 2 layers neural net

## Use pytorch (?) to optimize matrix multiplication