In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
import time

# 1. Load cleaned dataset
titanic = pd.read_csv('train.csv')

# 2. Features and target
target = 'Survived'
features = [col for col in titanic.columns if col != target]
X = titanic[features]
y = titanic[target]

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 4. Train baseline models
# Decision Tree
base_dt = DecisionTreeClassifier(random_state=42)
base_dt.fit(X_train, y_train)
y_pred_dt = base_dt.predict(X_test)
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# 5. Train AdaBoost
# Use decision stump as base estimator
stump = DecisionTreeClassifier(max_depth=1, random_state=42)
adaboost = AdaBoostClassifier(estimator=stump, n_estimators=50, learning_rate=1.0, random_state=42)
start_time = time.time()
adaboost.fit(X_train, y_train)
train_time = time.time() - start_time

y_pred_ab = adaboost.predict(X_test)

# 6. Evaluate all models
results = []
for name, y_pred in [('Decision Tree', y_pred_dt),
                     ('Random Forest', y_pred_rf),
                     ('AdaBoost', y_pred_ab)]:
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    time_taken = round(train_time, 4) if name == 'AdaBoost' else None
    results.append({'Model': name, 'Accuracy': acc, 'F1-score': f1, 'Training Time (s)': time_taken})

results_df = pd.DataFrame(results)
print(results_df)

# 7. Detailed Classification Report for AdaBoost
print("\nAdaBoost Classification Report:\n", classification_report(y_test, y_pred_ab, target_names=['Not Survived','Survived']))

           Model  Accuracy  F1-score  Training Time (s)
0  Decision Tree  0.821229  0.757576                NaN
1  Random Forest  0.821229  0.750000                NaN
2       AdaBoost  0.782123  0.697674             0.1679

AdaBoost Classification Report:
               precision    recall  f1-score   support

Not Survived       0.80      0.86      0.83       110
    Survived       0.75      0.65      0.70        69

    accuracy                           0.78       179
   macro avg       0.77      0.76      0.76       179
weighted avg       0.78      0.78      0.78       179

