In [418]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as ptl

In [419]:
df = pd.read_csv("titanic.csv")
df["Family"] = df["SibSp"] + df["Parch"]
df.drop(["SibSp","Parch","Cabin","PassengerId","Name","Ticket"],axis=1,inplace=True)
df

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked,Family
0,0,3,male,22.0,7.2500,S,1
1,1,1,female,38.0,71.2833,C,1
2,1,3,female,26.0,7.9250,S,0
3,1,1,female,35.0,53.1000,S,1
4,0,3,male,35.0,8.0500,S,0
...,...,...,...,...,...,...,...
886,0,2,male,27.0,13.0000,S,0
887,1,1,female,19.0,30.0000,S,0
888,0,3,female,,23.4500,S,3
889,1,1,male,26.0,30.0000,C,0


<h3>Train test split</h3>

In [420]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(df.drop("Survived",axis=1),df["Survived"],test_size=0.2,random_state=1)
x_train

Unnamed: 0,Pclass,Sex,Age,Fare,Embarked,Family
301,3,male,,23.2500,Q,2
309,1,female,30.0,56.9292,C,0
516,2,female,34.0,10.5000,S,0
120,2,male,21.0,73.5000,S,2
570,2,male,62.0,10.5000,S,0
...,...,...,...,...,...,...
715,3,male,19.0,7.6500,S,0
767,3,female,30.5,7.7500,Q,0
72,2,male,21.0,73.5000,S,0
235,3,female,,7.5500,S,0


In [453]:
numeric_features = ['Age', 'Family', 'Fare','Pclass']
categorical_features = ['Sex', 'Embarked']

# Numerical transformer
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', RobustScaler())
])

# Categorical transformer
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),  
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Define the model
model = GradientBoostingClassifier(n_estimators=500, random_state=42)  

# Create and fit the pipeline
pip_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', model)
])

pip_model.fit(x_train, y_train)

# Predict and evaluate the model
ans = pip_model.predict(x_test)
accuracy = accuracy_score(y_test, ans)

print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.8101


###### 