# Titanic - Machine Learning from Disaster

Trying out the Titanic challenge on Kaggle, as a beginner in ML challenges

## Setting up the environment

In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers

import matplotlib.pyplot as plt

# The following lines adjust the granularity of reporting.
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format

from jupyterthemes import jtplot
jtplot.style('onedork', figsize=(16, 9))

tf.keras.backend.set_floatx('float32')

print("Imported the modules.")

Imported the modules.


### Checking if the GPU is being utilised

In [3]:
if tf.config.list_physical_devices('GPU'):
    print('GPU Available')
else:
    print('No GPU found. Working on CPU')

GPU Available


## Loading the data

In [6]:
labelled_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

### Labelled dataset

In [7]:
labelled_df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.3,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.1,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C


### Test dataset

In [8]:
test_df

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.7,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.7,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.3,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.1,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.1,,S


In [9]:
labelled_df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.4,2.3,29.7,0.5,0.4,32.2
std,257.4,0.5,0.8,14.5,1.1,0.8,49.7
min,1.0,0.0,1.0,0.4,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.1,0.0,0.0,7.9
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.5
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3


### Trying out different combinations of features

In [10]:
men = labelled_df.loc[labelled_df.Sex == 'male']['Survived']
women = labelled_df.loc[labelled_df.Sex == 'female']['Survived']
rate_men = sum(men)/len(men)
rate_women = sum(women)/len(women)

print('Male survival rate   : {:.2f}%'.format(rate_men * 100))
print('Female survival rate : {:.2f}%'.format(rate_women * 100))

Male survival rate   : 18.89%
Female survival rate : 74.20%
