# Model Evaluation

In this notebook, we will evaluate the performance of the trained machine learning models using various metrics and visualizations. We will analyze the results to understand how well our models are performing and identify areas for improvement.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split

# Load the processed data
data = pd.read_csv('../data/processed/processed_data.csv')

# Split the data into features and target
X = data.drop('target', axis=1)
y = data['target']

# Load the trained model
import joblib
model = joblib.load('../models/trained_model.pkl')

# Make predictions
y_pred = model.predict(X)
y_proba = model.predict_proba(X)[:, 1]

# Evaluation Metrics
print("Classification Report:")
print(classification_report(y, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# ROC Curve
fpr, tpr, thresholds = roc_curve(y, y_proba)
roc_auc = roc_auc_score(y, y_proba)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.show()