In [1]:
from flask import Flask, jsonify, request
import joblib
import spacy
import re
from pathlib import Path

app = Flask(__name__)


model = joblib.load("models/ensemble_model.joblib")
vectorizer = joblib.load("models/vectorizer.joblib")
label_encoder = joblib.load("models/label_encoder.joblib")
nlp = spacy.load("en_core_web_sm")

# Preprocessing
def clean_and_lemmatize(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha and len(token) > 2]
    return " ".join(tokens)

# Predict
def predict(description):
    cleaned = clean_and_lemmatize(description)
    features = vectorizer.transform([cleaned])
    label_index = model.predict(features)[0]
    label = label_encoder.inverse_transform([label_index])[0]
    return label

@app.route("/predict", methods=["POST"])
def identify_condition():
    try:
        data = request.get_json(force=True)
        description = data.get("description", "")
        if not description:
            return jsonify({"error": "Missing 'description' in request"}), 400
        prediction = predict(description)
        return jsonify({"prediction": prediction})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route("/")
def home():
    return "Clinical Trial Classifier API is running."

if __name__ == "__main__":
    app.run(debug=False, host="127.0.0.1", port=5000)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [09/Apr/2025 11:01:57] "[37mPOST //predict HTTP/1.1[0m" 200 -
