<a href="https://colab.research.google.com/github/NtwaliEliel/Summative-assignment_MLOP_Project/blob/main/Summative_assignment_MLOP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
from google.colab import drive
drive.mount('/content/drive')  # follow the link and paste auth code
BASE = "/content/drive/MyDrive/mlops-project"
print("Base path:", BASE)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Base path: /content/drive/MyDrive/mlops-project


In [48]:
import os
BASE = "/content/drive/MyDrive/mlops-project"
folders = ["notebook","api","model","retrain/new_data","ui/assets","docker"]
for f in folders:
    os.makedirs(os.path.join(BASE,f), exist_ok=True)
print("Folders created under", BASE)


Folders created under /content/drive/MyDrive/mlops-project


In [49]:
# run in a code cell (keep the leading !)
!pip install -q fastapi uvicorn[standard] python-multipart joblib scikit-learn pyngrok


In [50]:
# Train, evaluate, and save model & scaler to Drive
import joblib, os
import pandas as pd, numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

BASE = "/content/drive/MyDrive/mlops-project"

# 1. Load data
iris = load_iris(as_frame=True)
df = iris.frame.copy()
df['target'] = iris.target

# 2. Train/test split
X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42, stratify=y)

# 3. Scale
scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_test_s = scaler.transform(X_test)

# 4. Grid search optimize logistic regression (simple but valid)
params = {'C':[0.01,0.1,1,10,100]}
grid = GridSearchCV(LogisticRegression(max_iter=1500), params, cv=5)
grid.fit(X_train_s, y_train)
best = grid.best_estimator_

# 5. Evaluate (required 4 metrics)
y_pred = best.predict(X_test_s)
acc = accuracy_score(y_test,y_pred)
prec = precision_score(y_test,y_pred, average='macro')
rec = recall_score(y_test,y_pred, average='macro')
f1 = f1_score(y_test,y_pred, average='macro')

print("BEST PARAMS:", grid.best_params_)
print("Accuracy:", acc)
print("Precision (macro):", prec)
print("Recall (macro):", rec)
print("F1 (macro):", f1)
print("\nCLASSIFICATION REPORT:\n", classification_report(y_test,y_pred, target_names=iris.target_names))
print("CONFUSION MATRIX:\n", confusion_matrix(y_test,y_pred))

# 6. Save model and scaler to Drive
os.makedirs(os.path.join(BASE,"model"), exist_ok=True)
joblib.dump(best, os.path.join(BASE,"model","iris_model.pkl"))
joblib.dump(scaler, os.path.join(BASE,"model","scaler.pkl"))
print("Saved model & scaler to", os.path.join(BASE,"model"))


BEST PARAMS: {'C': 10}
Accuracy: 1.0
Precision (macro): 1.0
Recall (macro): 1.0
F1 (macro): 1.0

CLASSIFICATION REPORT:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00        10
   virginica       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

CONFUSION MATRIX:
 [[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]
Saved model & scaler to /content/drive/MyDrive/mlops-project/model


In [51]:
import matplotlib.pyplot as plt, seaborn as sns, os
BASE = "/content/drive/MyDrive/mlops-project"
os.makedirs(os.path.join(BASE,"ui","assets"), exist_ok=True)

# class distribution
sns.countplot(x=df['target'])
plt.title("Class distribution")
plt.savefig(os.path.join(BASE,"ui","assets","class_distribution.png"))
plt.clf()

# correlation heatmap
sns.heatmap(df.drop(columns='target').corr(), annot=True)
plt.title("Feature correlation")
plt.savefig(os.path.join(BASE,"ui","assets","correlation.png"))
plt.clf()

# confusion matrix (recompute)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d")
plt.title("Confusion Matrix")
plt.savefig(os.path.join(BASE,"ui","assets","confusion_matrix.png"))
plt.clf()

print("Saved plots to", os.path.join(BASE,"ui","assets"))


Saved plots to /content/drive/MyDrive/mlops-project/ui/assets


<Figure size 640x480 with 0 Axes>

In [52]:
%%bash
BASE="/content/drive/MyDrive/mlops-project"
cat > $BASE/api/main.py <<'PY'
from fastapi import FastAPI
from pydantic import BaseModel
import joblib, numpy as np
MODEL_PATH = "/content/drive/MyDrive/mlops-project/model/iris_model.pkl"
SCALER_PATH = "/content/drive/MyDrive/mlops-project/model/scaler.pkl"
app = FastAPI()

class PredictIn(BaseModel):
    sepal_length: float
    sepal_width: float
    petal_length: float
    petal_width: float

def load_model():
    model = joblib.load(MODEL_PATH)
    scaler = joblib.load(SCALER_PATH)
    return model, scaler

@app.post("/predict")
async def predict(payload: PredictIn):
    model, scaler = load_model()
    X = np.array([[payload.sepal_length, payload.sepal_width, payload.petal_length, payload.petal_width]])
    Xs = scaler.transform(X)
    pred = int(model.predict(Xs)[0])
    proba = float(model.predict_proba(Xs).max())
    return {"prediction": pred, "probability": proba}
PY
echo "FastAPI app saved to Drive at /api/main.py"


FastAPI app saved to Drive at /api/main.py


In [56]:
# CELL 7: Start uvicorn and ngrok tunnel (optional)
from pyngrok import ngrok, conf
import os, time

BASE = "/content/drive/MyDrive/mlops-project"
API_DIR = os.path.join(BASE, "api")
os.chdir(API_DIR)

# If you have a token
NGROK_AUTH_TOKEN = "35nRKcgfPFLp8D5p2Q2Luf5cGiq_TArmpswMFHfijf73nL1n"  # paste your token here if you have one
if NGROK_AUTH_TOKEN:
    conf.get_default().auth_token = NGROK_AUTH_TOKEN

# Kill any existing ngrok processes to avoid hitting the limit on free accounts
ngrok.kill()

# Start uvicorn in background
get_ipython().system_raw("uvicorn main:app --host 0.0.0.0 --port 8000 &")

time.sleep(2)  # wait for server to start

# Start ngrok tunnel
public_url = ngrok.connect(8000)
print("Public URL:", public_url)
print("Swagger docs:", public_url.public_url + "/docs")

Public URL: NgrokTunnel: "https://braelynn-grabbable-conclusively.ngrok-free.dev" -> "http://localhost:8000"
Swagger docs: https://braelynn-grabbable-conclusively.ngrok-free.dev/docs


In [57]:
%%bash
BASE="/content/drive/MyDrive/mlops-project"
cat > $BASE/ui/index.html <<'HTML'
<!doctype html>
<html>
<head><meta charset="utf-8"><title>Iris Predictor</title></head>
<body>
  <h1>Iris Predictor</h1>
  <div>
    <label>sepal_length <input id="sl" /></label><br/>
    <label>sepal_width <input id="sw" /></label><br/>
    <label>petal_length <input id="pl" /></label><br/>
    <label>petal_width <input id="pw" /></label><br/>
    <button id="predictBtn">Predict</button>
    <pre id="result"></pre>
  </div>

  <h2>Retrain (upload CSV to Drive/retrain/new_data)</h2>
  <p>To retrain: upload a CSV to Drive â†’ run retrain cell in Colab. CSV must contain feature columns and a <strong>target</strong> column.</p>

<script>
const API_URL = "https://braelynn-grabbable-conclusively.ngrok-free.dev"; // If using ngrok, paste the public URL here, e.g. "https://abcd-1234.ngrok.io"
document.getElementById('predictBtn').onclick = async () => {
  const body = {
    sepal_length: parseFloat(document.getElementById('sl').value),
    sepal_width: parseFloat(document.getElementById('sw').value),
    petal_length: parseFloat(document.getElementById('pl').value),
    petal_width: parseFloat(document.getElementById('pw').value)
  };
  if (!API_URL) { alert("Set API_URL inside the file to the deployed API or ngrok URL"); return; }
  const res = await fetch(API_URL + "/predict", {
    method:"POST", headers: {'Content-Type':'application/json'}, body: JSON.stringify(body)
  });
  const j = await res.json();
  document.getElementById('result').innerText = JSON.stringify(j, null, 2);
};
</script>
</body>
</html>
HTML
echo "UI saved to Drive at ui/index.html"


UI saved to Drive at ui/index.html


In [58]:
# CELL 9: show an example CSV you can create locally and upload to Drive/retrain/new_data
example_csv = """sepal_length,sepal_width,petal_length,petal_width,target
5.1,3.5,1.4,0.2,0
6.0,2.2,4.0,1.0,1
6.5,3.0,5.2,2.0,2
"""
print("Copy this text to a file named extra_iris.csv and upload it to Drive -> mlops-project/retrain/new_data")
print(example_csv)


Copy this text to a file named extra_iris.csv and upload it to Drive -> mlops-project/retrain/new_data
sepal_length,sepal_width,petal_length,petal_width,target
5.1,3.5,1.4,0.2,0
6.0,2.2,4.0,1.0,1
6.5,3.0,5.2,2.0,2



In [59]:
# Retrain model from files in Drive/retrain/new_data and overwrite model & scaler
import glob, os, joblib, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
BASE = "/content/drive/MyDrive/mlops-project"
UPLOAD_DIR = os.path.join(BASE,"retrain","new_data")

print("Looking for CSVs in", UPLOAD_DIR)
files = glob.glob(os.path.join(UPLOAD_DIR, "*.csv"))
print("Found files:", files)
if not files:
    print("No files found. Upload a CSV to Drive/retrain/new_data and re-run this cell.")
else:
    # load base iris data
    from sklearn.datasets import load_iris
    iris = load_iris(as_frame=True)
    base_df = iris.frame.copy()
    base_df['target'] = iris.target

    new_dfs = []
    for f in files:
        df_new = pd.read_csv(f)
        if 'target' not in df_new.columns:
            raise ValueError(f"CSV {f} must include a 'target' column.")
        new_dfs.append(df_new)
    new_data = pd.concat(new_dfs, ignore_index=True)
    combined = pd.concat([base_df, new_data], ignore_index=True)

    X = combined.drop(columns=['target'])
    y = combined['target']
    scaler = StandardScaler().fit(X)
    X_s = scaler.transform(X)
    model = LogisticRegression(max_iter=1500)
    model.fit(X_s,y)

    # save
    os.makedirs(os.path.join(BASE,"model"), exist_ok=True)
    joblib.dump(model, os.path.join(BASE,"model","iris_model.pkl"))
    joblib.dump(scaler, os.path.join(BASE,"model","scaler.pkl"))
    print("Retrained model saved to", os.path.join(BASE,"model"))


Looking for CSVs in /content/drive/MyDrive/mlops-project/retrain/new_data
Found files: []
No files found. Upload a CSV to Drive/retrain/new_data and re-run this cell.


In [60]:
# Evaluate current model from Drive (after retrain)
import joblib, os
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

BASE = "/content/drive/MyDrive/mlops-project"
model = joblib.load(os.path.join(BASE,"model","iris_model.pkl"))
scaler = joblib.load(os.path.join(BASE,"model","scaler.pkl"))

iris = load_iris(as_frame=True)
df = iris.frame.copy(); df['target'] = iris.target
X = df.drop(columns=['target']); y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42, stratify=y)
X_test_s = scaler.transform(X_test)

y_pred = model.predict(X_test_s)
print("Accuracy:", accuracy_score(y_test,y_pred))
print("Precision (macro):", precision_score(y_test,y_pred, average='macro'))
print("Recall (macro):", recall_score(y_test,y_pred, average='macro'))
print("F1 (macro):", f1_score(y_test,y_pred, average='macro'))
print("\nClassification report:\n", classification_report(y_test,y_pred, target_names=iris.target_names))


Accuracy: 1.0
Precision (macro): 1.0
Recall (macro): 1.0
F1 (macro): 1.0

Classification report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00        10
   virginica       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [61]:
# Quick local check of predicted label mapping
import joblib, numpy as np, os
BASE = "/content/drive/MyDrive/mlops-project"
model = joblib.load(os.path.join(BASE,"model","iris_model.pkl"))
scaler = joblib.load(os.path.join(BASE,"model","scaler.pkl"))

# Example point (setosa): 5.1,3.5,1.4,0.2
X = np.array([[5.1,3.5,1.4,0.2]])
Xs = scaler.transform(X)
pred = int(model.predict(Xs)[0])
proba = float(model.predict_proba(Xs).max())
print("Prediction:", pred, "Probability:", proba)
print("Label mapping: 0=setosa, 1=versicolor, 2=virginica")


Prediction: 0 Probability: 0.9974832121991416
Label mapping: 0=setosa, 1=versicolor, 2=virginica




In [62]:
# CELL 13: zip the project for submission (creates a downloadable zip in Drive)
!zip -r /content/drive/MyDrive/mlops-project_submission.zip /content/drive/MyDrive/mlops-project
print("Created ZIP at /content/drive/MyDrive/mlops-project_submission.zip")


updating: content/drive/MyDrive/mlops-project/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/Summative assignment - MLOP.ipynb (deflated 74%)
updating: content/drive/MyDrive/mlops-project/notebook/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/api/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/api/main.py (deflated 54%)
updating: content/drive/MyDrive/mlops-project/api/.ipynb_checkpoints/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/model/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/model/iris_model.pkl (deflated 31%)
updating: content/drive/MyDrive/mlops-project/model/scaler.pkl (deflated 40%)
updating: content/drive/MyDrive/mlops-project/retrain/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/retrain/new_data/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/ui/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/ui/assets/ (stored 0%)
updating: content/drive/MyDrive/mlops-project/ui/assets/class