In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV
from sklearn.metrics import mean_squared_error,r2_score, mean_absolute_error, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
import warnings 
warnings.filterwarnings('ignore')
print("all Libraries imported successfully")

all Libraries imported successfully


In [34]:
# Load the dataset
df = sns.load_dataset("titanic")
df.columns
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [35]:
# keep useful columns only
df = df[['survived','pclass', 'sex','age','fare']]

In [36]:
# fill missing age values
df['age'].fillna(df['age'].median(), inplace=True)

In [37]:
# Convert sex to numeric
df['sex'] = LabelEncoder().fit_transform(df['sex'])

In [38]:
df

Unnamed: 0,survived,pclass,sex,age,fare
0,0,3,1,22.0,7.2500
1,1,1,0,38.0,71.2833
2,1,3,0,26.0,7.9250
3,1,1,0,35.0,53.1000
4,0,3,1,35.0,8.0500
...,...,...,...,...,...
886,0,2,1,27.0,13.0000
887,1,1,0,19.0,30.0000
888,0,3,0,28.0,23.4500
889,1,1,1,26.0,30.0000


In [39]:
# Fuatures and labels
X = df[['pclass', 'sex', 'age', 'fare']]
y = df['survived']

In [40]:
# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state= 42)

# Bagging Example - Random Forest

In [41]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("Random Forest Accuracy :", accuracy_score(y_test, y_pred_rf))

Random Forest Accuracy : 0.8044692737430168


# Boosting Example - Gradient Boosting

In [42]:
gb_model= GradientBoostingClassifier(n_estimators=100, learning_rate = 0.1, random_state= 42)
gb_model.fit(X_train, y_train)
y_pred_gb =  gb_model.predict(X_test)

print("Gradient Boosting Accuracy :", accuracy_score(y_test, y_pred_gb))

Gradient Boosting Accuracy : 0.8100558659217877


# Stacking Example - Combine Multiple Models? 


In [43]:
def predict_survival(age, pclass, sex, fare):
    new_passenger = np.array([[pclass, sex, age, fare]])
    pred_rf = rf_model.predict(new_passenger)[0]
    pred_gb = gb_model.predict(new_passenger)[0]
    pred_stacking= stacking_model.predict(new_passenger)[0]

    label_map = {0:'Died', 1:'Survived'}

    print(f"Random Forest Prediction: {label_map[pred_rf]}")
    print(f"Gradient Boosting Prediction: {label_map[pred_gb]}")
    print(f"Stacking Prediction: {label_map[pred_stacking]}")

In [44]:
# Prediction exmaple
predict_survival(age = 100, pclass = 1, sex = 0, fare = 107.25)

Random Forest Prediction: Survived
Gradient Boosting Prediction: Survived
Stacking Prediction: Survived


# Next class: Stacking, visualize 

In [45]:
estimators = [
    ('rf',RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb',GradientBoostingClassifier(n_estimators=100, learning_rate = 0.1, random_state= 42))
]

In [46]:
stacking_model = StackingClassifier(
    estimators = estimators,
    final_estimator = LogisticRegression(),
    cv= 5
)

In [47]:
stacking_model.fit(X_train, y_train)
y_pred_stacking = stacking_model.predict(X_test)

In [48]:
print("Accuracy:", accuracy_score(y_test,y_pred_stacking))

Accuracy: 0.8212290502793296


In [49]:
# Calculate accuracy for all models
models = {
    'Random Forest' : rf_model,
    'Gradient Boosting': gb_model,
    'Stacking': stacking_model
}
accuracies = {}
for name, model in models.items():
    y_pred =  model.predict(X_test)
    accuracies[name]= accuracy_score(y_test,y_pred)

print("\nmodel Camparison:")
for name, acc in accuracies.items():
    print(f"{name}: {acc:.4f}")


model Camparison:
Random Forest: 0.8045
Gradient Boosting: 0.8101
Stacking: 0.8212


In [50]:
predict_survival(age = 100, pclass = 1, sex = 0, fare = 107.25)

Random Forest Prediction: Survived
Gradient Boosting Prediction: Survived
Stacking Prediction: Survived
