In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
import json
import logging
import os

# Configure logging
logging.basicConfig(
    filename='model_monitoring.log',
    level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s'
)

# Load trained model and scaler
try:
    model = joblib.load('model.pkl')
    scaler = joblib.load('scaler.pkl')
except Exception as e:
    logging.error(f"Error loading model or scaler: {e}")
    raise

# Load new data
try:
    new_data = pd.read_csv('new_data.csv')  # Make sure this file exists
except Exception as e:
    logging.error(f"Error loading new data: {e}")
    raise

# Preprocess new data
def preprocess_data(df):
    df = df.dropna()
    X = df.drop('target', axis=1)
    y = df['target']
    X_scaled = scaler.transform(X)
    return X_scaled, y

try:
    X_new, y_new = preprocess_data(new_data)
except Exception as e:
    logging.error(f"Error during preprocessing: {e}")
    raise

# Predict and evaluate
y_pred = model.predict(X_new)

def calculate_metrics(y_true, y_pred):
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1_score': f1_score(y_true, y_pred, zero_division=0)
    }

current_metrics = calculate_metrics(y_new, y_pred)
logging.info(f"Current Metrics: {current_metrics}")

# Load baseline metrics
try:
    with open('baseline_metrics.json', 'r') as f:
        baseline_metrics = json.load(f)
except Exception as e:
    logging.warning("Baseline metrics not found. Assuming current metrics as baseline.")
    baseline_metrics = current_metrics

# Drift detection
def detect_drift(current, baseline, threshold=0.05):
    drift = False
    for metric in current:
        if abs(current[metric] - baseline[metric]) > threshold:
            logging.warning(f"Drift detected in {metric}: baseline={baseline[metric]}, current={current[metric]}")
            drift = True
    return drift

drift_found = detect_drift(current_metrics, baseline_metrics)
if drift_found:
    logging.warning("Performance drift detected.")
else:
    logging.info("No drift detected.")

# Plot metrics comparison
df_plot = pd.DataFrame([baseline_metrics, current_metrics], index=['Baseline', 'Current'])
df_plot.plot(kind='bar')
plt.title("Performance Metrics Comparison")
plt.ylabel("Score")
plt.ylim(0, 1)
plt.xticks(rotation=0)
plt.tight_layout()
plt.savefig("performance_comparison.png")
plt.show()

# Optional: update baseline (uncomment below to enable)
# with open('baseline_metrics.json', 'w') as f:
#     json.dump(current_metrics, f)