In [1]:
import seaborn as sns
import pandas as pd

df = sns.load_dataset("titanic")

# Select useful features
features = ["age", "fare", "pclass", "sex"]
df_selected = df[features]

print(df_selected.head())


    age     fare  pclass     sex
0  22.0   7.2500       3    male
1  38.0  71.2833       1  female
2  26.0   7.9250       3  female
3  35.0  53.1000       1  female
4  35.0   8.0500       3    male


In [2]:
# Missing values
print(df.isnull().sum())   # check missing

# Fill numeric missing values
df["age"] = df["age"].fillna(df["age"].mean())

# Fill categorical missing values
df["embarked"] = df["embarked"].fillna(df["embarked"].mode()[0])

print(df.isnull().sum())   # re-check


survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64
survived         0
pclass           0
sex              0
age              0
sibsp            0
parch            0
fare             0
embarked         0
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64


In [4]:
# OUTLIER DETECTION
import numpy as np

Q1 = df["fare"].quantile(0.25)
Q3 = df["fare"].quantile(0.75)
IQR = Q3 - Q1

lower_limit = Q1 - 1.5 * IQR
upper_limit = Q3 + 1.5 * IQR

print("Lower:", lower_limit, "Upper:", upper_limit)

outliers = df[(df["fare"] < lower_limit) | (df["fare"] > upper_limit)]
print("Outliers count:", outliers.shape)
print(outliers.head())


Lower: -26.724 Upper: 65.6344
Outliers count: (116, 15)
    survived  pclass     sex        age  sibsp  parch      fare embarked  \
1          1       1  female  38.000000      1      0   71.2833        C   
27         0       1    male  19.000000      3      2  263.0000        S   
31         1       1  female  29.699118      1      0  146.5208        C   
34         0       1    male  28.000000      1      0   82.1708        C   
52         1       1  female  49.000000      1      0   76.7292        C   

    class    who  adult_male deck  embark_town alive  alone  
1   First  woman       False    C    Cherbourg   yes  False  
27  First    man        True    C  Southampton    no  False  
31  First  woman       False    B    Cherbourg   yes  False  
34  First    man        True  NaN    Cherbourg    no  False  
52  First  woman       False    D    Cherbourg   yes  False  
