In [None]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset
df = sns.load_dataset('titanic')

# Handle missing values
df['age'].fillna(df['age'].median(), inplace=True)
df['embark_town'].fillna(df['embark_town'].mode()[0], inplace=True)
df.drop(columns=['deck'], inplace=True)

# Encode categorical columns
le = LabelEncoder()
for col in ['sex', 'embark_town']:
    df[col] = le.fit_transform(df[col])

# Drop unwanted columns
df.drop(columns=['embarked', 'class', 'who', 'alive', 'adult_male', 'alone'], inplace=True)

# Scale numerical features
df[['age', 'fare']] = StandardScaler().fit_transform(df[['age', 'fare']])

# Split dataset
X = df.drop('survived', axis=1)
y = df['survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Train shape:", X_train.shape, "| Test shape:", X_test.shape)
print(X_train.head())

Train shape: (712, 7) | Test shape: (179, 7)
     pclass  sex       age  sibsp  parch      fare  embark_town
331       1    1  1.240235      0      0 -0.074583            2
733       2    1 -0.488887      0      0 -0.386671            2
382       3    1  0.202762      0      0 -0.488854            2
704       3    1 -0.258337      1      0 -0.490280            2
813       3    0 -1.795334      4      2 -0.018709            2


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['embark_town'].fillna(df['embark_town'].mode()[0], inplace=True)
