<a href="https://colab.research.google.com/github/AjitPandey-4/Mlops-assignment-2/blob/main/monitoring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.21.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.21.0 (from mlflow)
  Downloading mlflow_skinny-2.21.0-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.21.0->mlflow)
  Downloading databricks_sdk-0.47.0-py3-none-any.whl.metadata (38 kB)
Collecting fastapi<1 (from mlflow-skinny==2.21.0->mlflow)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn<1 (from mlflow-skinny==2.21.0->mlflow)
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 k

In [2]:
import mlflow
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

mlflow.set_experiment("Fashion MNIST Model Monitoring")

with mlflow.start_run():
    # Data preparation explicitly
    subset_size = 2000
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    x_train_flat = x_train[:subset_size].reshape(subset_size, -1)
    y_train_subset = y_train[:subset_size]
    x_test_flat = x_test[:500].reshape(500, -1)
    y_test_subset = y_test[:500]

    # Scaling explicitly
    scaler = StandardScaler()
    x_train_scaled = scaler.fit_transform(x_train_flat)
    x_test_scaled = scaler.transform(x_test_flat)

    # Model explicitly
    model = RandomForestClassifier(n_estimators=50, random_state=42)
    model.fit(x_train_scaled, y_train_subset)

    # Predictions explicitly
    preds = model.predict(x_test_scaled)
    accuracy = accuracy_score(y_test_subset, preds)

    # Logging explicitly
    mlflow.log_param("n_estimators", 50)
    mlflow.log_metric("accuracy", accuracy)

    print(f"Tracked accuracy explicitly in MLflow: {accuracy:.4f}")


2025/03/23 12:00:59 INFO mlflow.tracking.fluent: Experiment with name 'Fashion MNIST Model Monitoring' does not exist. Creating a new experiment.


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Tracked accuracy explicitly in MLflow: 0.8340


In [3]:
# Simple Data Drift Detection explicitly
import numpy as np

# Simulating drift detection explicitly
train_mean = np.mean(x_train_scaled)
test_mean = np.mean(x_test_scaled)

drift_detected = abs(train_mean - test_mean) > 0.1  # threshold example
print(f"Data Drift Detected explicitly: {drift_detected}")

Data Drift Detected explicitly: False
