In [1]:
import pandas as pd
import numpy as np

# Loading the dataset

In [2]:
df=pd.read_csv('Titanic-Dataset.csv')

# Handling Null Values

In [3]:
# Check for null values (1 line)
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [15]:
# Fill null values for 'Embarked' with the mode(1 line)
df["Embarked"] = df["Embarked"].fillna(df["Embarked"].mode()[0])
df["Embarked"].isnull().sum()

'S'

In [19]:
# Drop rows with null values in 'Cabin' (or alternatively, you can fill it with a placeholder) (1 line)
#df["Cabin"] = df["Cabin"].fillna("Not Disclosed")
#df["Cabin"]
drops = df["Age"].dropna()
drops.isnull()

0      False
2      False
3      False
4      False
6      False
       ...  
885    False
886    False
887    False
889    False
890    False
Name: Age, Length: 613, dtype: bool

In [6]:
# Check for duplicates (1 line)
df.duplicated().sum()
# Drop duplicates if any (1 line)
df = df.drop_duplicates()

# Handling Outliers

In [7]:
import numpy as np

# Function to remove outliers using the IQR method
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

# Remove outliers from 'Fare' using the remove_outilers function (1 line)
df = remove_outliers(df, "Fare")

# Scaling and Normalization

In [8]:
df.select_dtypes('number')

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
0,1,0,3,22.0,1,0,7.2500
2,3,1,3,26.0,0,0,7.9250
3,4,1,1,35.0,1,0,53.1000
4,5,0,3,35.0,0,0,8.0500
5,6,0,3,,0,0,8.4583
...,...,...,...,...,...,...,...
886,887,0,2,27.0,0,0,13.0000
887,888,1,1,19.0,0,0,30.0000
888,889,0,3,,1,2,23.4500
889,890,1,1,26.0,0,0,30.0000


In [9]:
df.select_dtypes('object')

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
0,"Braund, Mr. Owen Harris",male,A/5 21171,Not Disclosed,S
2,"Heikkinen, Miss. Laina",female,STON/O2. 3101282,Not Disclosed,S
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,113803,C123,S
4,"Allen, Mr. William Henry",male,373450,Not Disclosed,S
5,"Moran, Mr. James",male,330877,Not Disclosed,Q
...,...,...,...,...,...
886,"Montvila, Rev. Juozas",male,211536,Not Disclosed,S
887,"Graham, Miss. Margaret Edith",female,112053,B42,S
888,"Johnston, Miss. Catherine Helen ""Carrie""",female,W./C. 6607,Not Disclosed,S
889,"Behr, Mr. Karl Howell",male,111369,C148,C


In [10]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Standard scaling for 'Fare' (2 lines)
scaler = StandardScaler()
df['Fare_scaled'] = scaler.fit_transform(df[['Fare']])

# Min-Max scaling for 'Age' (2 lines)
min_max = MinMaxScaler()
df['Age_scaled'] = min_max.fit_transform(df[['Age']])

# Encoding Categorical Variables

In [11]:
# One-hot encoding for 'Embarked' and 'Sex' (1 line)
#one_hot = pd.get_dummies(df["Embarked"], df["Sex"])
one_hot = pd.get_dummies(df, columns=["Embarked", "Sex"])
one_hot

Unnamed: 0,PassengerId,Survived,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Fare_scaled,Age_scaled,Embarked_C,Embarked_Q,Embarked_S,Sex_female,Sex_male
0,1,0,3,"Braund, Mr. Owen Harris",22.0,1,0,A/5 21171,7.2500,Not Disclosed,-0.779117,0.271174,False,False,True,False,True
2,3,1,3,"Heikkinen, Miss. Laina",26.0,0,0,STON/O2. 3101282,7.9250,Not Disclosed,-0.729373,0.321438,False,False,True,True,False
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,1,0,113803,53.1000,C123,2.599828,0.434531,False,False,True,True,False
4,5,0,3,"Allen, Mr. William Henry",35.0,0,0,373450,8.0500,Not Disclosed,-0.720161,0.434531,False,False,True,False,True
5,6,0,3,"Moran, Mr. James",,0,0,330877,8.4583,Not Disclosed,-0.690071,,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",27.0,0,0,211536,13.0000,Not Disclosed,-0.355367,0.334004,False,False,True,False,True
887,888,1,1,"Graham, Miss. Margaret Edith",19.0,0,0,112053,30.0000,B42,0.897459,0.233476,False,False,True,True,False
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",,1,2,W./C. 6607,23.4500,Not Disclosed,0.414752,,False,False,True,True,False
889,890,1,1,"Behr, Mr. Karl Howell",26.0,0,0,111369,30.0000,C148,0.897459,0.321438,True,False,False,False,True
