**Authors:** Pierina Lopez **rnumber:** r0913865
# Model comparision
 This notebook compares the performance of three models: Logistic Regression, XGBoost, and PyCaret's Best Model. 
 It computes accuracy, precision, and recall for each model using the Titanic dataset and visualizes the results.



In [3]:
# Import necessary libraries
import pandas as pd

# Define metrics for comparison
comparison_data = {
    "Model": ["Logistic Regression", "XGBoost", "PyCaret Best Model"],
    "Accuracy": [0.7865, 0.7528, 0.8870],  # Replace with actual results
    "Precision": [0.79, 0.75, 0.7956],  # Replace with actual results
    "Recall": [0.79, 0.75, 0.6768],  # Replace with actual results
}

# Create a DataFrame
comparison_df = pd.DataFrame(comparison_data)

# Save metrics to a CSV file
comparison_df.to_csv('../Metrics/model_comparison.csv', index=False)

# Display the comparison
print(comparison_df)


                 Model  Accuracy  Precision  Recall
0  Logistic Regression    0.7865     0.7900  0.7900
1              XGBoost    0.7528     0.7500  0.7500
2   PyCaret Best Model    0.8870     0.7956  0.6768


In [32]:
# %% Import Required Libraries
import pandas as pd
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score
from pycaret.classification import load_model

# %% Load Data
df = pd.read_csv('../03_Final_Import/titanic_prepared.csv')

# %% Load Models
with open('../Pickle-Files/logistic_model.pkl', 'rb') as f:
    logistic_model = pickle.load(f)

with open('../Pickle-Files/xgb_model.pkl', 'rb') as f:
    xgb_model = pickle.load(f)

pycaret_best_model = load_model('../Pickle-Files/pycaret_best_model')

# %% Predictions with Logistic Regression
logistic_features = df.columns.drop('survived')  # All columns except 'survived'
df['logistic_survived'] = logistic_model.predict(df[logistic_features])

# %% Predictions with XGBoost
xgb_features = ['pclass', 'age', 'sibsp', 'parch', 'fare', 'sex_female', 'embarked_2', 'embarked_3']
df_xgb_input = df[xgb_features].copy()

# Ensure feature alignment for XGBoost
expected_features = xgb_model.get_booster().feature_names
missing_features = set(expected_features) - set(df_xgb_input.columns)
for feature in missing_features:
    df_xgb_input[feature] = 0  # Default value

df_xgb_input = df_xgb_input[expected_features]  # Reorder features
df['xgboost_survived'] = xgb_model.predict(df_xgb_input)

# %% Predictions with PyCaret
# Exclude 'survived' and any prediction columns when passing data to PyCaret
pycaret_input = df.drop(columns=['survived', 'logistic_survived', 'xgboost_survived'])
df['pycaret_survived'] = pycaret_best_model.predict(pycaret_input)

# %% Compute and Display Metrics for Each Model
models = ['logistic', 'xgboost', 'pycaret']
results = []

for model in models:
    y_true = df['survived']
    y_pred = df[f'{model}_survived']
    
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    
    results.append({'Model': model.capitalize(), 'Accuracy': accuracy, 'Precision': precision, 'Recall': recall})

# Create a results DataFrame and display it
results_df = pd.DataFrame(results)
print(results_df)

# %% Save Results
results_df.to_csv('../05_Comparison/model_comparison_results.csv', index=False)


Transformation Pipeline and Model Successfully Loaded
      Model  Accuracy  Precision    Recall
0  Logistic  0.800900   0.760383  0.700000
1   Xgboost  0.825647   0.774481  0.767647
2   Pycaret  0.887514   0.908163  0.785294
