In [1]:
# Imports
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Read in the Titanic Dataset as a dataframe
titanicDatabaseCSVPath = "Resources/titanic.csv"
titanicDF = pd.read_csv(titanicDatabaseCSVPath)

In [3]:
# Show the newly created dataframe
titanicDF.head(15)

Unnamed: 0,name,gender,age,class,embarked,country,ticketno,fare,sibsp,parch,survived
0,"Abbing, Mr. Anthony",male,42.0,3rd,S,United States,5547.0,7.11,0.0,0.0,no
1,"Abbott, Mr. Eugene Joseph",male,13.0,3rd,S,United States,2673.0,20.05,0.0,2.0,no
2,"Abbott, Mr. Rossmore Edward",male,16.0,3rd,S,United States,2673.0,20.05,1.0,1.0,no
3,"Abbott, Mrs. Rhoda Mary 'Rosa'",female,39.0,3rd,S,England,2673.0,20.05,1.0,1.0,yes
4,"Abelseth, Miss. Karen Marie",female,16.0,3rd,S,Norway,348125.0,7.13,0.0,0.0,yes
5,"Abelseth, Mr. Olaus Jørgensen",male,25.0,3rd,S,United States,348122.0,7.13,0.0,0.0,yes
6,"Abelson, Mr. Samuel",male,30.0,2nd,C,France,3381.0,24.0,1.0,0.0,no
7,"Abelson, Mrs. Hannah",female,28.0,2nd,C,France,3381.0,24.0,1.0,0.0,yes
8,"Abī-Al-Munà, Mr. Nāsīf Qāsim",male,27.0,3rd,C,Lebanon,2699.0,18.1509,0.0,0.0,yes
9,"Abrahamsson, Mr. Abraham August Johannes",male,20.0,3rd,S,Finland,3101284.0,7.1806,0.0,0.0,yes


In [4]:
# Change the 'C','S', 'Q's in the 'embarked' column to show the full location name
titanicDF['embarked'] = titanicDF['embarked'].map({'S': 'Southampton, England', 
                                                   'Q': 'Queenstown, Ireland', 
                                                   'C': 'Cherbourg, France'})

In [5]:
# Show the change
titanicDF.head()

Unnamed: 0,name,gender,age,class,embarked,country,ticketno,fare,sibsp,parch,survived
0,"Abbing, Mr. Anthony",male,42.0,3rd,"Southampton, England",United States,5547.0,7.11,0.0,0.0,no
1,"Abbott, Mr. Eugene Joseph",male,13.0,3rd,"Southampton, England",United States,2673.0,20.05,0.0,2.0,no
2,"Abbott, Mr. Rossmore Edward",male,16.0,3rd,"Southampton, England",United States,2673.0,20.05,1.0,1.0,no
3,"Abbott, Mrs. Rhoda Mary 'Rosa'",female,39.0,3rd,"Southampton, England",England,2673.0,20.05,1.0,1.0,yes
4,"Abelseth, Miss. Karen Marie",female,16.0,3rd,"Southampton, England",Norway,348125.0,7.13,0.0,0.0,yes


In [6]:
# Split the 'Name' column by comma into two new columns: 'Last Name' and 'First Name'
titanicDF[['last name', 'first name']] = titanicDF['name'].str.split(',', n=1, expand=True)

# Strip any leading/trailing whitespace from the names
titanicDF['last name'] = titanicDF['last name'].str.strip()
titanicDF['first name'] = titanicDF['first name'].str.strip()

# Drop the name column
titanicDF = titanicDF.drop(columns=['name'])

titanicDF.head(20)

Unnamed: 0,gender,age,class,embarked,country,ticketno,fare,sibsp,parch,survived,last name,first name
0,male,42.0,3rd,"Southampton, England",United States,5547.0,7.11,0.0,0.0,no,Abbing,Mr. Anthony
1,male,13.0,3rd,"Southampton, England",United States,2673.0,20.05,0.0,2.0,no,Abbott,Mr. Eugene Joseph
2,male,16.0,3rd,"Southampton, England",United States,2673.0,20.05,1.0,1.0,no,Abbott,Mr. Rossmore Edward
3,female,39.0,3rd,"Southampton, England",England,2673.0,20.05,1.0,1.0,yes,Abbott,Mrs. Rhoda Mary 'Rosa'
4,female,16.0,3rd,"Southampton, England",Norway,348125.0,7.13,0.0,0.0,yes,Abelseth,Miss. Karen Marie
5,male,25.0,3rd,"Southampton, England",United States,348122.0,7.13,0.0,0.0,yes,Abelseth,Mr. Olaus Jørgensen
6,male,30.0,2nd,"Cherbourg, France",France,3381.0,24.0,1.0,0.0,no,Abelson,Mr. Samuel
7,female,28.0,2nd,"Cherbourg, France",France,3381.0,24.0,1.0,0.0,yes,Abelson,Mrs. Hannah
8,male,27.0,3rd,"Cherbourg, France",Lebanon,2699.0,18.1509,0.0,0.0,yes,Abī-Al-Munà,Mr. Nāsīf Qāsim
9,male,20.0,3rd,"Southampton, England",Finland,3101284.0,7.1806,0.0,0.0,yes,Abrahamsson,Mr. Abraham August Johannes


In [7]:
# Export the titanicDF as a .json file
titanicDF.to_json('titanicDF.json', orient='records')