### Libraries Used
* Pandas, Numpy - Data Loading / Transformation / Analysis
* Sklearn - Preprocessing

In [1]:
# Import all dependencies required for the problem.
from __future__ import print_function
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

In [2]:
# Set a Seed for random number generation for reproducible results
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

In [4]:
# Load the titanic dataset using Pandas library 
df = pd.read_excel('../../data/titanic_dataset.xlsx').dropna(subset=['Age'])

In [5]:
df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,1
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,1
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0


In [7]:
# Split the dataset into dependent features (passenger details used for prediction)
# and target features (prediction if the passenger survived)
x = df.loc[:,:'Embarked']
y = df['Survived']

In [8]:
# Preview the Titanic Dataset
x.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [9]:
# Our Smart Classifier tries to predict if a passenger survived or not based on his / her details. 
def smart_classifier(row):
    if row['Sex'] == 'male':
        return 0
    else:
        return 1

### Data Dictionary

    Variable	Definition	Key
    survival	Survival	0 = No, 1 = Yes
    pclass	Ticket class	1 = 1st, 2 = 2nd, 3 = 3rd
    sex	Sex	
    Age	Age in years	
    sibsp	# of siblings / spouses aboard the Titanic	
    parch	# of parents / children aboard the Titanic	
    ticket	Ticket number	
    fare	Passenger fare	
    cabin	Cabin number	
    embarked	Port of Embarkation	C = Cherbourg, Q = Queenstown, S = Southampton

### Variable Notes

    pclass: A proxy for socio-economic status (SES)
    1st = Upper
    2nd = Middle
    3rd = Lower

    age: Age is fractional if less than 1. If the age is estimated, is it in the form of xx.5

    sibsp: The dataset defines family relations in this way...
    Sibling = brother, sister, stepbrother, stepsister
    Spouse = husband, wife (mistresses and fiancés were ignored)

    parch: The dataset defines family relations in this way...
    Parent = mother, father
    Child = daughter, son, stepdaughter, stepson
    Some children travelled only with a nanny, therefore parch=0 for them.

In [10]:
from sklearn.metrics import accuracy_score
def print_accuracy(x):
    predictions = []
    for index, record in x.iterrows():
        predictions.append(smart_classifier(record))
    return accuracy_score(y, predictions) * 100.0

In [11]:
filtered_df = df.dropna(subset=['Age'])
x = filtered_df.loc[:, :'Embarked']
y = filtered_df['Survived']

In [12]:
print_accuracy(x)

78.01120448179272