In [10]:
#Sanity check for virtual environment and packages

import sys, site
print("Python exe:", sys.executable)  # should point to ...\ds-portfolio\venv\Scripts\python.exe
print("Site-packages:", site.getsitepackages())

import numpy as np, sklearn, mlflow
print("NumPy:", np.__version__)
print("scikit-learn OK")
print("mlflow OK")

Python exe: d:\Projects\ds-portfolio\venv\Scripts\python.exe
Site-packages: ['d:\\Projects\\ds-portfolio\\venv', 'd:\\Projects\\ds-portfolio\\venv\\Lib\\site-packages']
NumPy: 2.3.3
scikit-learn OK
mlflow OK


In [11]:
# Day 1 — Baseline ML Experiment (Iris + RandomForest + MLflow)

import os
import random
import numpy as np
import joblib, os
from pathlib import Path

# Reproducibility: set fixed seeds so results are stable run-to-run
SEED = 42
random.seed(SEED)
np.random.seed(SEED)


In [12]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

import mlflow
import mlflow.sklearn

import matplotlib.pyplot as plt
import pandas as pd


In [13]:
X, y = load_iris(return_X_y=True, as_frame=True)
display(X.head())
display(y.head())
print("Classes:", sorted(y.unique().tolist()))
print("Class counts:\n", y.value_counts())


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

Classes: [0, 1, 2]
Class counts:
 target
0    50
1    50
2    50
Name: count, dtype: int64


In [14]:
# Hold out 20% for final test
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.20, random_state=SEED, stratify=y
)

# From the remaining 80%, take 25% as validation (0.25 * 0.80 = 0.20)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.25, random_state=SEED, stratify=y_trainval
)

print("Shapes:", X_train.shape, X_val.shape, X_test.shape)


Shapes: (90, 4) (30, 4) (30, 4)


In [15]:
mlflow.set_experiment("day1_baseline")

# Optional: if you started `mlflow ui` in a different folder or port, you can direct logging:
mlflow.set_tracking_uri("file:///D:/Projects/ds-portfolio/mlruns")
mlflow.set_experiment("day1_baseline")


<Experiment: artifact_location='file:///D:/Projects/ds-portfolio/mlruns/572973866092394201', creation_time=1758396032414, experiment_id='572973866092394201', last_update_time=1758396032414, lifecycle_stage='active', name='day1_baseline', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [16]:
params = {
    "n_estimators": 200,    # number of trees; more = lower variance, higher compute
    "max_depth": None,      # None = let trees grow deep; consider small numbers to regularize
    "min_samples_split": 2, # min samples to split a node (higher = more regularization)
    "min_samples_leaf": 1,  # min samples per leaf (higher = more regularization)
    "max_features": "sqrt", # subset of features per split; standard for classification
    "random_state": SEED,   # reproducible tree bootstrap/splits
    "n_jobs": -1            # use all CPU cores
}
rf = RandomForestClassifier(**params)
rf


In [17]:
with mlflow.start_run(run_name="rf_iris_baseline"):
    # --- Log params (so runs are comparable) ---
    mlflow.log_params(params)
    mlflow.log_param("split_seed", SEED)
    mlflow.log_param("train_size", len(X_train))
    mlflow.log_param("val_size", len(X_val))
    mlflow.log_param("test_size", len(X_test))

    # --- Train ---
    rf.fit(X_train, y_train)

    # --- Validate (for tuning decisions) ---
    val_preds = rf.predict(X_val)
    val_acc = accuracy_score(y_val, val_preds)
    val_f1  = f1_score(y_val, val_preds, average="macro")

    mlflow.log_metric("val_accuracy", val_acc)
    mlflow.log_metric("val_f1_macro", val_f1)

    # --- Final test (unseen data) ---
    test_preds = rf.predict(X_test)
    test_acc = accuracy_score(y_test, test_preds)
    test_f1  = f1_score(y_test, test_preds, average="macro")

    mlflow.log_metric("test_accuracy", test_acc)
    mlflow.log_metric("test_f1_macro", test_f1)

    # --- Confusion matrix → artifact (file attached to the run) ---
    cm = confusion_matrix(y_test, test_preds)
    fig, ax = plt.subplots(figsize=(4, 4))
    ax.imshow(cm, interpolation='nearest')
    ax.set_title("Confusion Matrix (Test)")
    ax.set_xlabel("Predicted"); ax.set_ylabel("True")
    for (i, j), v in np.ndenumerate(cm):
        ax.text(j, i, str(v), ha='center', va='center')
    plt.tight_layout()
    plt.savefig("confusion_matrix.png")
    plt.close(fig)
    mlflow.log_artifact("confusion_matrix.png")

    # --- Classification report → artifact ---
    report = classification_report(y_test, test_preds)
    with open("classification_report.txt", "w") as f:
        f.write(report)
    mlflow.log_artifact("classification_report.txt")

    # --- Log the trained model (serialized + conda/env metadata) ---
    mlflow.sklearn.log_model(
        sk_model=rf,
        name="model",
        registered_model_name=None  # we’ll use registry later in the programme
    )

    print({
        "val_accuracy": round(val_acc, 4),
        "val_f1_macro": round(val_f1, 4),
        "test_accuracy": round(test_acc, 4),
        "test_f1_macro": round(test_f1, 4)
    })




{'val_accuracy': 0.9333, 'val_f1_macro': 0.9333, 'test_accuracy': 0.9333, 'test_f1_macro': 0.9333}


In [18]:
# Save a local copy of the model

Path("models").mkdir(exist_ok=True)
joblib.dump(rf, "models/rf_iris.pkl")
print("Saved to models/rf_iris.pkl")

Saved to models/rf_iris.pkl
