### Step 1: Load model and vectorizer

In [6]:
import joblib
import pandas as pd

# Load trained model and vectorizer
model = joblib.load("../models/multilabel_model.pkl")
vectorizer = joblib.load("../models/vectorizer.pkl")


### Step 2: Save the Trained Model and Vectorizer

In [7]:
# Save the model
joblib.dump(model, "../models/multilabel_model.pkl")

# Save the vectorizer
joblib.dump(vectorizer, "../models/vectorizer.pkl")

['../models/vectorizer.pkl']

### Step 3: Define a predict_labels function¶

In [8]:
def predict_labels(text):
    """
    Predicts medical conditions from a diagnostic report.

    Args:
        text (str): Raw medical report text.

    Returns:
        list: Predicted labels.
    """
    # Preprocess
    cleaned_text = text.lower().replace('\n', ' ')

    # Vectorize
    vectorized_input = vectorizer.transform([cleaned_text])

    # Predict (binary multilabel)
    preds = model.predict(vectorized_input)[0]

    # Map 0/1 back to label names
    df = pd.read_csv("../data/medical_reports.csv")
    labels = df.columns[1:]

    predicted_labels = [label for label, val in zip(labels, preds) if val == 1]

    return predicted_labels


In [9]:
example = "Patient reports increased thirst, frequent urination, and blurred vision."
predict_labels(example)


['hypertension', 'pneumonia', 'arthritis']






# NOTEBOOK 5: Deploy the Model

In this notebook, I simulate real-world deployment steps by:

- Saving the trained multilabel classification model and vectorizer using `joblib`
- Defining a `predict_labels()` function to make predictions on new medical report text
- Demonstrating predictions on a new example input
- (Optional) Added explainability using SHAP in a later section

This notebook completes the full pipeline from EDA to deployment.
