In [30]:
# Colab cell 1
!pip install -q fastapi uvicorn[standard] pyngrok joblib sentence-transformers lifelines scikit-learn flask nest_asyncio

In [31]:
# Colab cell 2
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
# imports
import os
from pathlib import Path
import joblib
import json
import numpy as np
import pandas as pd
from typing import Optional
import nest_asyncio
nest_asyncio.apply()

In [33]:
# webserver libs
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
from pyngrok import ngrok

In [34]:
# NLP/ML libs
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
# import lifelines (for survival model)
from lifelines import CoxPHFitter

In [35]:
# similarity
from sklearn.metrics.pairwise import cosine_similarity

In [36]:
# text cleaning helper (use your existing cleaning logic)
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
STOP = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [37]:
def clean_text(text):
    if text is None:
        return ""
    t = re.sub(r'[^a-zA-Z0-9\s]', ' ', str(text))
    t = t.lower()
    t = " ".join([w for w in t.split() if w not in STOP])
    return t.strip()

In [38]:
# Colab cell 3
DRIVE_BASE = Path("/content/drive/MyDrive/Secure-Result")  # adjust
MODEL_DIR = DRIVE_BASE / "models"          # where you dumped models
DATA_DIR = DRIVE_BASE / "Datasets"                         # complaints.csv and resolved_complaints.csv

print("MODEL_DIR:", MODEL_DIR)
print("DATA_DIR:", DATA_DIR)


MODEL_DIR: /content/drive/MyDrive/Secure-Result/models
DATA_DIR: /content/drive/MyDrive/Secure-Result/Datasets


In [39]:
# Colab cell 4
# Load classifier + vectorizer
classifier_path = MODEL_DIR / "complaint_type_classifier.pkl"
vectorizer_path = MODEL_DIR / "tfidf_vectorizer.pkl"

In [40]:
classifier = joblib.load(classifier_path)
vectorizer = joblib.load(vectorizer_path)

In [41]:

# Load resolved cases csv (for SBERT similarity)
resolved_csv = DATA_DIR / "resolved_complaints_dataset.csv"
resolved_df = pd.read_csv(resolved_csv)
resolved_df["Cleaned Complaint Text"] = resolved_df["Complaint Text"].astype(str).apply(clean_text)

In [42]:
# Load SBERT and precompute embeddings
sbert_model_name = "all-MiniLM-L6-v2"
sbert = SentenceTransformer(sbert_model_name)
resolved_embeddings = sbert.encode(resolved_df["Cleaned Complaint Text"].tolist(), show_progress_bar=True, convert_to_numpy=True)

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [43]:
# Load SLA survival model (lifelines CoxPH fitted and saved)
sla_model_path = MODEL_DIR / "sla_survival_model.pkl"
cph = None
if sla_model_path.exists():
    cph = joblib.load(sla_model_path)
else:
    print("SLA model not found at", sla_model_path)

In [44]:
# Load anomaly model (optional)
anomaly_path = MODEL_DIR / "anomaly_model.pkl"
anomaly_model = None
if anomaly_path.exists():
    anomaly_model = joblib.load(anomaly_path)

In [45]:
# If you used label encoders etc, load them too
le_student_program_path = MODEL_DIR / "le_student_program.pkl"
le_faculty_department_path = MODEL_DIR / "le_faculty_department.pkl"
le_student_program = joblib.load(le_student_program_path) if le_student_program_path.exists() else None
le_faculty_department = joblib.load(le_faculty_department_path) if le_faculty_department_path.exists() else None

print("Loaded: classifier, vectorizer, sbert (and embeddings), SLA model:", bool(cph), "anomaly:", bool(anomaly_model))


Loaded: classifier, vectorizer, sbert (and embeddings), SLA model: True anomaly: True


In [46]:
# Colab cell 5
def predict_category(text):
    cleaned = clean_text(text)
    X = vectorizer.transform([cleaned])
    result = {}
    # classification + confidence
    if hasattr(classifier, "predict_proba"):
        probs = classifier.predict_proba(X)[0]
        idx = int(np.argmax(probs))
        pred = classifier.classes_[idx]
        conf = float(probs[idx])
    else:
        pred = classifier.predict(X)[0]
        conf = 0.8
    # top keywords if linear model
    top_keywords = []
    try:
        if hasattr(classifier, "coef_"):
            class_index = list(classifier.classes_).index(pred)
            coefs = classifier.coef_[class_index]
            topn = np.argsort(coefs)[-5:][::-1]
            feature_names = vectorizer.get_feature_names_out()
            top_keywords = [feature_names[i] for i in topn]
    except Exception:
        top_keywords = []
    return {"prediction": str(pred), "confidence": conf, "top_keywords": top_keywords}

In [47]:
def find_similar_complaint(text, top_k=1):
    cleaned = clean_text(text)
    emb = sbert.encode([cleaned], convert_to_numpy=True)
    sims = cosine_similarity(emb, resolved_embeddings)[0]
    top_idx = np.argsort(sims)[-top_k:][::-1]
    resp = []
    for idx in top_idx:
        row = resolved_df.iloc[int(idx)]
        resp.append({
            "index": int(idx),
            "score": float(sims[int(idx)]),
            "complaint_type": row.get("Complaint Type", ""),
            "complaint_text": row.get("Complaint Text", ""),
            "resolution_desc": row.get("Resolution Description", ""),
            "resolution_time": row.get("Resolution Time", None)
        })
    return resp

In [48]:
def predict_sla_risk(metadata: dict) -> dict:
    """
    metadata: dict with any features your survival model expects (one row)
    returns: predicted median resolution time and probability of breach by t days (e.g., 7)
    """
    if cph is None:
        raise RuntimeError("Survival model not loaded")
    # You must build a single-row DataFrame with same feature columns used for training.
    # Example below assumes cph was trained on many dummy/one-hot columns; adjust as necessary.
    df = pd.DataFrame([metadata])
    # predict median:
    try:
        median = cph.predict_median(df)[0]
    except Exception as e:
        median = None
    # predict survival function and compute P(T>t) or P(breach by t)
    t_query = metadata.get("t_query", 7)
    try:
        surv = cph.predict_survival_function(df)
        # surv is a DataFrame (index = timeline) ; pick/interpolate p(T > t_query)
        surv_vals = surv.iloc[:, 0]
        if t_query in surv_vals.index:
            p_not_breach = float(surv_vals.loc[t_query])
        else:
            # linear interpolate
            p_not_breach = float(np.interp(t_query, surv_vals.index.values, surv_vals.values))
        p_breach = 1.0 - p_not_breach
    except Exception as e:
        p_breach = None
    return {"predicted_median_days": median, "breach_prob_at_t": p_breach}


In [49]:
# Colab cell 6
app = FastAPI(title="SecureResult Colab API")

class PredictRequest(BaseModel):
    text: str
    course_code: Optional[str] = None
    semester: Optional[str] = None
    student_program: Optional[str] = None
    faculty_department: Optional[str] = None
    t_query: Optional[int] = 7  # days threshold for SLA

class PredictResponse(BaseModel):
    category: str
    confidence: float
    top_keywords: list
    similar: list
    sla: dict

In [50]:
@app.get("/health")
def health():
    return {"status": "ok", "models_loaded": {"classifier": bool(classifier), "sbert": bool(sbert), "sla": bool(cph)}}

In [51]:
@app.post("/predict", response_model=dict)
def predict(req: PredictRequest):
    cat = predict_category(req.text)
    similar = find_similar_complaint(req.text, top_k=1)
    # build metadata for SLA: adapt fields to what your cph expects (one-hot names etc)
    metadata = {
        "t_query": req.t_query,
        # include any categorical encodings required by your trained cph.
        # For example, if you used Faculty Department dummy columns, you must set those exactly.
        # Simplest option: include only features used in your notebook when you trained the cph.
        "Complaint Type": cat["prediction"],
        "Faculty Department": req.faculty_department or "",
        # Add others as required...
    }
    sla_out = {}
    try:
        sla_out = predict_sla_risk(metadata)
    except Exception as e:
        sla_out = {"error": str(e)}
    out = {
        "category": cat["prediction"],
        "confidence": cat["confidence"],
        "top_keywords": cat["top_keywords"],
        "similar": similar,
        "sla": sla_out
    }
    return out

In [52]:
@app.post("/similar")
def similar(req: PredictRequest):
    return {"similar": find_similar_complaint(req.text, top_k=3)}

In [53]:
@app.post("/sla")
def sla(req: PredictRequest):
    metadata = {
        "t_query": req.t_query,
        "Complaint Type": req.text  # placeholder - adapt
    }
    try:
        return predict_sla_risk(metadata)
    except Exception as e:
        return {"error": str(e)}


In [54]:
# install pyngrok if not installed
!pip install -q pyngrok

from pyngrok import ngrok

# set token securely (replace with your token)
NGROK_TOKEN = ""   # <- paste token here (do NOT push to repo)
ngrok.set_auth_token(NGROK_TOKEN)

# now connect
public_url = ngrok.connect(8000)   # or the port uvicorn will expose
print("Public URL:", public_url.public_url)


Public URL: https://daphine-wariest-correctingly.ngrok-free.dev


In [55]:
# Run the API with uvicorn in the notebook (non-blocking)
import threading, time

def run_uvicorn():
    uvicorn.run(app, host="0.0.0.0", port=8000)

t = threading.Thread(target=run_uvicorn, daemon=True)
t.start()

print("Server started. Health:", public_url.public_url + "/health")

Server started. Health: https://daphine-wariest-correctingly.ngrok-free.dev/health


  return asyncio_run(self.serve(sockets=sockets), loop_factory=self.config.get_loop_factory())
Exception in thread Thread-9 (run_uvicorn):
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.12/threading.py", line 1012, in run
