In [1]:
# Install required packages
!pip install numpy==1.26.4 evidently==0.6.7 ucimlrepo



In [2]:
!pip install ucimlrepo

from ucimlrepo import fetch_ucirepo



  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [5]:
# ===============================
# Load Auto MPG Dataset
# ===============================
print("Downloading Auto MPG dataset...")
auto_mpg = fetch_ucirepo(id=9)

df = auto_mpg.data.features.copy()  # Ensures it's a deep copy
df['mpg'] = auto_mpg.data.targets


print("Original dataset shape:", df.shape)
print(df.head())


Downloading Auto MPG dataset...
Original dataset shape: (398, 8)
   displacement  cylinders  horsepower  weight  acceleration  model_year  \
0         307.0          8       130.0    3504          12.0          70   
1         350.0          8       165.0    3693          11.5          70   
2         318.0          8       150.0    3436          11.0          70   
3         304.0          8       150.0    3433          12.0          70   
4         302.0          8       140.0    3449          10.5          70   

   origin   mpg  
0       1  18.0  
1       1  15.0  
2       1  18.0  
3       1  16.0  
4       1  17.0  


In [6]:
# ===============================
# Basic Data Cleaning
# ===============================
df.dropna(inplace=True)
print("Cleaned dataset shape:", df.shape)


Cleaned dataset shape: (392, 8)


In [7]:
# ===============================
# Split into Reference and Current Data
# ===============================
reference_data = df.iloc[:150].copy()
current_data = df.iloc[150:300].copy()

In [8]:
# ===============================
# Introduce Data Drift in Current Data
# ===============================
print("Simulating data drift in 'current' dataset...")
current_data['displacement'] *= 1.1
current_data['horsepower'] *= 1.2
current_data['weight'] *= 0.9
current_data['cylinders'] = current_data['cylinders'].replace({
    3: 4, 4: 5, 5: 3, 6: 8, 8: 4
})

print("Reference data shape:", reference_data.shape)
print("Current data shape:", current_data.shape)


Simulating data drift in 'current' dataset...
Reference data shape: (150, 8)
Current data shape: (150, 8)


In [14]:
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset
import datetime



In [15]:
# ===============================
# Data Drift Report
# ===============================
data_drift_report = Report(metrics=[
    DataDriftPreset()
])
data_drift_report.run(current_data=current_data, reference_data=reference_data, column_mapping=None)

report_filename = f'auto_mpg_data_drift_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}.html'
data_drift_report.save_html(report_filename)
print(f"Data Drift Report saved to {report_filename}")

  terms = (f_obs_float - f_exp)**2 / f_exp
  terms = (f_obs_float - f_exp)**2 / f_exp


Data Drift Report saved to auto_mpg_data_drift_20250813_173820.html


In [19]:
import pandas as pd
import numpy as np
import requests
import io
import datetime
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset


In [20]:
# ===============================
# Model Training (Linear Regression)
# ===============================
print("\nTraining linear regression model...")

features = ['displacement', 'cylinders', 'horsepower', 'weight', 'acceleration', 'model_year', 'origin']
target = 'mpg'

X_ref = reference_data[features]
y_ref = reference_data[target]

model = LinearRegression()
model.fit(X_ref, y_ref)

model_filename = 'auto_mpg_model.joblib'
joblib.dump(model, model_filename)
print(f"Model saved to {model_filename}")


Training linear regression model...
Model saved to auto_mpg_model.joblib


In [21]:
# ===============================
# Predictions
# ===============================
reference_data['prediction'] = model.predict(X_ref)

X_curr = current_data[features]
current_data['prediction'] = model.predict(X_curr)


In [22]:
# ===============================
# Target Drift Report
# ===============================
print("\nGenerating Target Drift report...")
model_report = Report(metrics=[
    TargetDriftPreset()
])
model_report.run(current_data=current_data, reference_data=reference_data, column_mapping=None)

model_report_filename = f'auto_mpg_model_performance_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}.html'
model_report.save_html(model_report_filename)
print(f"Model Performance Report saved to {model_report_filename}")



Generating Target Drift report...
Model Performance Report saved to auto_mpg_model_performance_20250813_174027.html


In [23]:
# ===============================
# Drift Detection Function
# ===============================
def check_for_drift(report_json, threshold=0.1):
    for metric in report_json.get('metrics', []):
        if metric.get('metric') == 'DatasetDriftMetric':
            drift_score = metric.get('result', {}).get('drift_share')
            print("Drift Score:", drift_score)
            if drift_score and drift_score > threshold:
                return True
    return False

def local_alert(drift_detected):
    if drift_detected:
        print("\n!!! ALERT: Data Drift Detected !!!")
    else:
        print("\nNo significant data drift detected.")


In [24]:
# ===============================
# Run Drift Check
# ===============================
report_json_data = data_drift_report.as_dict()
drift_detected = check_for_drift(report_json_data)
local_alert(drift_detected)

Drift Score: 0.5

!!! ALERT: Data Drift Detected !!!
