# Student MLE MiniProject: Flask Microservice with ML Model

This notebook completes the Springboard MEC2 Flask mini-project by:

- Training a simple scikit-learn model and persisting it
- Building a Flask microservice exposing `/health` and `/predict`
- Providing tests via Flask's test client
- Exporting a runnable `app.py`, `requirements.txt`, and `README.md`

Reference: [`Student_MLE_MiniProject_Flask.ipynb`](https://github.com/springboard-curriculum/mec2-projects/blob/main/Student_MLE_MiniProject_Flask.ipynb)

You can run the app directly without running the notebook; the exported `app.py` will train a model at first startup if a saved model is not found.



In [23]:
# Ensure Flask is available in this notebook kernel
%pip install -q flask


Note: you may need to restart the kernel to use updated packages.


In [24]:
import os
import json
import joblib
import time
from pathlib import Path
from typing import Dict, Any

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

ARTIFACT_DIR = Path('Study/mlbootcamp/flask_miniproject/service/artifacts')
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
MODEL_PATH = ARTIFACT_DIR / 'model.joblib'
METRICS_PATH = ARTIFACT_DIR / 'metrics.json'
META_PATH = ARTIFACT_DIR / 'meta.json'

# Train a simple model (Iris dataset)
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(max_iter=1000, multi_class='auto')),
])

start = time.time()
pipeline.fit(X_train, y_train)
train_time_s = time.time() - start

# Evaluate
preds = pipeline.predict(X_test)
acc = accuracy_score(y_test, preds)
report = classification_report(y_test, preds, output_dict=True)

metrics: Dict[str, Any] = {
    'accuracy': acc,
    'classification_report': report,
    'train_time_s': train_time_s,
    'n_train': int(len(X_train)),
    'n_test': int(len(X_test)),
}

# Persist artifacts
joblib.dump(pipeline, MODEL_PATH)
with open(METRICS_PATH, 'w') as f:
    json.dump(metrics, f, indent=2)
with open(META_PATH, 'w') as f:
    json.dump({'created_at': time.strftime('%Y-%m-%d %H:%M:%S')}, f)

print('Saved model to', MODEL_PATH)
print('Saved metrics to', METRICS_PATH)
print('Accuracy:', round(acc, 4))


Saved model to Study/mlbootcamp/flask_miniproject/service/artifacts/model.joblib
Saved metrics to Study/mlbootcamp/flask_miniproject/service/artifacts/metrics.json
Accuracy: 0.9333




In [25]:
from __future__ import annotations
import os
import json
from pathlib import Path
from typing import List, Dict, Any

import joblib
import numpy as np
from flask import Flask, jsonify, request
from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

ARTIFACT_DIR = Path('Study/mlbootcamp/flask_miniproject/service/artifacts')
MODEL_PATH = ARTIFACT_DIR / 'model.joblib'
METRICS_PATH = ARTIFACT_DIR / 'metrics.json'

IRIS = load_iris()
TARGET_NAMES = list(IRIS.target_names)


def ensure_model() -> Pipeline:
    ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
    if MODEL_PATH.exists():
        return joblib.load(MODEL_PATH)
    # Train quickly if not present
    X, y = IRIS.data, IRIS.target
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', LogisticRegression(max_iter=1000)),
    ])
    pipeline.fit(X, y)
    joblib.dump(pipeline, MODEL_PATH)
    return pipeline


def create_app() -> Flask:
    app = Flask(__name__)
    model: Pipeline = ensure_model()

    @app.get('/health')
    def health() -> Any:
        healthy = MODEL_PATH.exists()
        metrics: Dict[str, Any] = {}
        if METRICS_PATH.exists():
            try:
                metrics = json.loads(METRICS_PATH.read_text())
            except Exception:
                metrics = {}
        return jsonify({
            'status': 'ok' if healthy else 'degraded',
            'model_present': healthy,
            'metrics': metrics,
        })

    @app.post('/predict')
    def predict() -> Any:
        payload = request.get_json(silent=True) or {}
        if 'features' not in payload:
            return jsonify({'error': 'Missing "features" field'}), 400
        features = payload['features']
        if not isinstance(features, list) or len(features) == 0:
            return jsonify({'error': '"features" must be a non-empty list'}), 400
        try:
            X = np.array(features, dtype=float)
        except Exception:
            return jsonify({'error': 'Could not parse features as numeric'}), 400
        if X.ndim == 1:
            X = X.reshape(1, -1)
        if X.shape[1] != IRIS.data.shape[1]:
            return jsonify({'error': f'Each sample must have {IRIS.data.shape[1]} features'}), 400
        preds = model.predict(X)
        proba = None
        try:
            proba = model.predict_proba(X)
        except Exception:
            proba = None
        classes = [TARGET_NAMES[int(i)] for i in preds]
        response: Dict[str, Any] = {
            'predictions': preds.tolist(),
            'classes': classes,
        }
        if proba is not None:
            response['probabilities'] = np.max(proba, axis=1).tolist()
        return jsonify(response)

    return app

app = create_app()

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=int(os.getenv('PORT', '8000')), debug=False)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8000
 * Running on http://10.99.97.18:8000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [01/Sep/2025 06:31:16] "[33mGET /ui HTTP/1.1[0m" 404 -
127.0.0.1 - - [01/Sep/2025 06:31:16] "[33mGET / HTTP/1.1[0m" 404 -
127.0.0.1 - - [01/Sep/2025 06:31:19] "[33mGET / HTTP/1.1[0m" 404 -
127.0.0.1 - - [01/Sep/2025 06:31:25] "[33mGET /index HTTP/1.1[0m" 404 -
127.0.0.1 - - [01/Sep/2025 06:32:28] "[33mGET / HTTP/1.1[0m" 404 -


: 

In [None]:
# Basic tests using Flask test client
from flask.testing import FlaskClient
import json
import numpy as np

# Reuse `app` from previous cell
client: FlaskClient = app.test_client()

# Test /health
resp = client.get('/health')
assert resp.status_code == 200, resp.data
health_payload = resp.get_json()
print('/health:', health_payload)

# Test /predict with one sample
sample = [5.1, 3.5, 1.4, 0.2]
resp = client.post('/predict', json={'features': sample})
assert resp.status_code == 200, resp.data
pred_payload = resp.get_json()
print('/predict one:', pred_payload)

# Test /predict with batch
batch = [
    [5.9, 3.0, 5.1, 1.8],
    [6.7, 3.1, 4.7, 1.5],
]
resp = client.post('/predict', json={'features': batch})
assert resp.status_code == 200, resp.data
pred_payload = resp.get_json()
print('/predict batch:', pred_payload)

# Negative cases
resp = client.post('/predict', json={})
assert resp.status_code == 400
resp = client.post('/predict', json={'features': []})
assert resp.status_code == 400
resp = client.post('/predict', json={'features': [0, 1]})
assert resp.status_code == 400

print('All tests passed.')
