In [4]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import urllib.request

In [5]:
# Load the Titanic dataset and make it classy
# Download data from a website
url = "https://github.com/JohnEric-Creator/FreeDataScienceAcquisition/raw/main/Titanic-Dataset.csv"
filename = "Titanic-Dataset.csv"
urllib.request.urlretrieve(url, filename)

('Titanic-Dataset.csv', <http.client.HTTPMessage at 0x1f95403dbe0>)

In [6]:
titanic_df = pd.read_csv(filename)

In [7]:
# Show off the first-class data
print("\nFirst few rows of the Titanic dataset, straight from the archives:\n")
print(titanic_df.head())


First few rows of the Titanic dataset, straight from the archives:

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C1

In [8]:
# Check for any missing values in style
print("\nInvestigating the data for any missing values, like Sherlock Holmes:\n")
print(titanic_df.isnull().sum())


Investigating the data for any missing values, like Sherlock Holmes:

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64


In [9]:
# Fill in missing age values with the most elegant mean age
mean_age = round(titanic_df['Age'].mean(), 2)
titanic_df['Age'].fillna(mean_age, inplace=True)

In [10]:
# Drop the Cabin column with poise
titanic_df.drop('Cabin', axis=1, inplace=True)

In [11]:
# Replace missing values in the Embarked column with the most popular port
mode_embarked = titanic_df['Embarked'].mode()[0]
titanic_df['Embarked'].fillna(mode_embarked, inplace=True)

In [12]:
# Check for any remaining missing values with thoroughness
print("\nDouble-checking for any remaining missing values, like a true detective:\n")
print(titanic_df.isnull().sum())


Double-checking for any remaining missing values, like a true detective:

PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked       0
dtype: int64


In [13]:
# Remove any rows with missing values with a touch of firmness
titanic_df.dropna(inplace=True)

In [14]:
# Encode the Sex and Embarked columns with a dash of tech
titanic_df = pd.get_dummies(titanic_df, columns=['Sex', 'Embarked'])

In [15]:
# Display the refined dataset with a hint of satisfaction
print("\nThe final polished dataset, ready for analysis like a true aristocrat:\n")
print(titanic_df.head())


The final polished dataset, ready for analysis like a true aristocrat:

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name   Age  SibSp  Parch  \
0                            Braund, Mr. Owen Harris  22.0      1      0   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  38.0      1      0   
2                             Heikkinen, Miss. Laina  26.0      0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  35.0      1      0   
4                           Allen, Mr. William Henry  35.0      0      0   

             Ticket     Fare  Sex_female  Sex_male  Embarked_C  Embarked_Q  \
0         A/5 21171   7.2500           0         1           0           0   
1          PC 17599  71.2833           1         0           1           0   
2  STON/O2. 3101282