In [1]:
# Importing dependencies
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from splinter import Browser
from bs4 import BeautifulSoup as soup

In [2]:
# Displaying test csv as a dataframe
test_df = pd.read_csv('Resources/test.csv')
test_df

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [3]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Pclass       418 non-null    int64  
 2   Name         418 non-null    object 
 3   Sex          418 non-null    object 
 4   Age          332 non-null    float64
 5   SibSp        418 non-null    int64  
 6   Parch        418 non-null    int64  
 7   Ticket       418 non-null    object 
 8   Fare         417 non-null    float64
 9   Cabin        91 non-null     object 
 10  Embarked     418 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 36.1+ KB


In [4]:
# Viewing how many nulls by 3rd class in Cabin column
test_df.loc[test_df['Pclass'] == 3]['Cabin'].value_counts(dropna=False)

Cabin
NaN      214
F G63      1
G6         1
F E46      1
F E57      1
Name: count, dtype: int64

In [5]:
# Dropping Cabin column to be replaced with Deck column
test_df_dropped = test_df.drop(['Cabin'], axis=1)
test_df_dropped.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,S


In [6]:
deck_list = []

for class_number in test_df_dropped['Pclass']:
    if class_number == 1:
        deck_list.append('A,B,C,D,E')
    else:
        deck_list.append('D,E,F,G')

test_df_dropped['Deck'] = deck_list
test_df_dropped

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,Deck
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,Q,"D,E,F,G"
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,S,"D,E,F,G"
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,Q,"D,E,F,G"
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,S,"D,E,F,G"
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,S,"D,E,F,G"
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,S,"D,E,F,G"
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C,"A,B,C,D,E"
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,S,"D,E,F,G"
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,S,"D,E,F,G"


In [7]:
list_of_names_test_csv = list(test_df_dropped[test_df_dropped.isnull().any(axis=1)]['Name'])
list_of_names_test_csv

['Ilieff, Mr. Ylio',
 'Flegenheim, Mrs. Alfred (Antoinette)',
 'Samaan, Mr. Elias',
 'Johnston, Mrs. Andrew G (Elizabeth Lily" Watson)"',
 'Roth, Miss. Sarah A',
 'Hee, Mr. Ling',
 'Franklin, Mr. Thomas Parham',
 'Shaughnessy, Mr. Patrick',
 'Mangiavacchi, Mr. Serafino Emilio',
 'Davison, Mr. Thomas Henry',
 'Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)',
 'Miles, Mr. Frank',
 'Demetri, Mr. Marinko',
 'Lamb, Mr. John Joseph',
 'Khalil, Mr. Betros',
 "O'Donoghue, Ms. Bridget",
 'Pedersen, Mr. Olaf',
 'Guest, Mr. Robert',
 'Foley, Mr. William',
 'Ryan, Mr. Edward',
 'Willer, Mr. Aaron (Abi Weller")"',
 'Shine, Miss. Ellen Natalia',
 'Thomas, Mr. John',
 'Kiernan, Mr. John',
 'Kennedy, Mr. John',
 'McCoy, Miss. Alicia',
 'Lefebre, Mrs. Frank (Frances)',
 'Thomas, Mr. Charles P',
 'Hilliard, Mr. Herbert Henry',
 'Crafton, Mr. John Bertram',
 'Matinoff, Mr. Nicola',
 'Storey, Mr. Thomas',
 'Smyth, Miss. Julia',
 'Pearce, Mr. Ernest',
 'Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevie

In [8]:
first_30 = list_of_names_test_csv[:29]
first_30

['Ilieff, Mr. Ylio',
 'Flegenheim, Mrs. Alfred (Antoinette)',
 'Samaan, Mr. Elias',
 'Johnston, Mrs. Andrew G (Elizabeth Lily" Watson)"',
 'Roth, Miss. Sarah A',
 'Hee, Mr. Ling',
 'Franklin, Mr. Thomas Parham',
 'Shaughnessy, Mr. Patrick',
 'Mangiavacchi, Mr. Serafino Emilio',
 'Davison, Mr. Thomas Henry',
 'Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)',
 'Miles, Mr. Frank',
 'Demetri, Mr. Marinko',
 'Lamb, Mr. John Joseph',
 'Khalil, Mr. Betros',
 "O'Donoghue, Ms. Bridget",
 'Pedersen, Mr. Olaf',
 'Guest, Mr. Robert',
 'Foley, Mr. William',
 'Ryan, Mr. Edward',
 'Willer, Mr. Aaron (Abi Weller")"',
 'Shine, Miss. Ellen Natalia',
 'Thomas, Mr. John',
 'Kiernan, Mr. John',
 'Kennedy, Mr. John',
 'McCoy, Miss. Alicia',
 'Lefebre, Mrs. Frank (Frances)',
 'Thomas, Mr. Charles P',
 'Hilliard, Mr. Herbert Henry']

In [9]:
first_30[28]

'Hilliard, Mr. Herbert Henry'

In [51]:
browser = Browser('chrome')
url = 'https://titanicfacts.net/titanic-passenger-list/'
browser.visit(url)
html = browser.html

In [61]:
all_classes_df = pd.read_html(html)
all_classes_df[0].to_csv('Resources/1st_class')
all_classes_df[1].to_csv('Resources/2nd_class')
all_classes_df[2].to_csv('Resources/3rd_class')

In [62]:
pd.read_csv('Resources/1st_class')

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4
0,0,Surname,First Names,Age,Boarded,Survivor (S) or Victim (†)
1,1,Allen,Miss Elisabeth Walton,29,Southampton,S
2,2,Allison,Mr Hudson Joshua Creighton,30,Southampton,†
3,3,Allison,Mrs Bessie Waldo,25,Southampton,†
4,4,Allison,Miss Helen Loraine,2,Southampton,†
...,...,...,...,...,...,...
320,320,Williams,Mr Richard Norris II,21,Cherbourg,S
321,321,Wilson,Miss Helen Alice (Maid to Mrs Frederic Oakley ...,31,Cherbourg,S
322,322,Woolner,Mr Hugh,45,Southampton,S
323,323,Wright,Mr George,62,Southampton,†


In [None]:
# Displaying train csv as a dataframe
train_df = pd.read_csv('Resources/train.csv')
train_df

In [None]:
train_df.info()

In [None]:
# Displaying gender submission csv as a dataframe
gender_submission_df = pd.read_csv('Resources/gender_submission.csv')
gender_submission_df

In [None]:
gender_submission_df.info()