In [6]:
from tensorflow.keras.models import load_model
import joblib
import numpy as np
import re

# Preprocess the input text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text

# Load the model, vectorizer, and label mapping
model = load_model("medical_specialty_model.keras")
vectorizer = joblib.load("vectorizer.pkl")
index_to_specialty = joblib.load("index_to_specialty.pkl")

# Prediction function
def predict_specialty(text_input):
    processed_text = preprocess_text(text_input)
    processed_vector = vectorizer.transform([processed_text]).toarray()
    prediction = model.predict(processed_vector)
    predicted_index = np.argmax(prediction)
    predicted_specialty = index_to_specialty[predicted_index]
    confidence = prediction[0][predicted_index]
    
    print(f"\n🩺 Predicted Specialty: {predicted_specialty}")
    print(f"🔮 Confidence: {confidence:.4f}")
    return predicted_specialty, confidence


In [7]:
sample_text = "A 50-year-old female whose 51-year-old sister has a history of multiple colon polyps, which may slightly increase her risk for colon cancer in the future."
predict_specialty(sample_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 518ms/step

🩺 Predicted Specialty: gastroenterology
🔮 Confidence: 0.8812


('gastroenterology', 0.8812083)