In [7]:
import pandas as pd
import numpy as np

# Loading the dataset

In [8]:
df=pd.read_csv('Titanic-Dataset.csv')

# Handling Null Values

In [9]:
print(df.isnull().sum())

df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df.drop(columns=['Cabin'], inplace=True)


PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)


# Handling Duplicates

In [10]:
print(df.duplicated().sum())
df.drop_duplicates(inplace=True)

0


# Handling Outliers

In [11]:
import numpy as np

def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

df = remove_outliers(df, 'Fare')


# Scaling and Normalization

In [12]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = StandardScaler()
df['Fare_scaled'] = scaler.fit_transform(df[['Fare']])

min_max_scaler = MinMaxScaler()
df['Age_scaled'] = min_max_scaler.fit_transform(df[['Age']])

# Encoding Categorical Variables

In [13]:
df = pd.get_dummies(df, columns=['Embarked', 'Sex'])
print(df.head())

   PassengerId  Survived  Pclass  \
0            1         0       3   
2            3         1       3   
3            4         1       1   
4            5         0       3   
5            6         0       3   

                                           Name   Age  SibSp  Parch  \
0                       Braund, Mr. Owen Harris  22.0      1      0   
2                        Heikkinen, Miss. Laina  26.0      0      0   
3  Futrelle, Mrs. Jacques Heath (Lily May Peel)  35.0      1      0   
4                      Allen, Mr. William Henry  35.0      0      0   
5                              Moran, Mr. James   NaN      0      0   

             Ticket     Fare  Fare_scaled  Age_scaled  Embarked_C  Embarked_Q  \
0         A/5 21171   7.2500    -0.779117    0.271174       False       False   
2  STON/O2. 3101282   7.9250    -0.729373    0.321438       False       False   
3            113803  53.1000     2.599828    0.434531       False       False   
4            373450   8.0500    