<a href="https://colab.research.google.com/github/Deepakpanchal24/Class_project/blob/main/New_NLP_DL_Pragatee1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
pip install pandas numpy tensorflow transformers scikit-learn gradio #install required library


import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import os
from datetime import datetime

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/Medical_Project/medimind_india_raw_data.csv")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "")
df = df.dropna(subset=["symptoms", "differential_diagnoses"])
df["combined_input"] = (
    df["symptoms"].fillna('') + " " +
    df["medical_history"].fillna('') + " " +
    df["test_results"].fillna('')
)

# Load model
model = SentenceTransformer("all-MiniLM-L6-v2")
case_embeddings = model.encode(df["combined_input"].tolist(), convert_to_tensor=True, device='cpu')

# Setup history file
history_file = "diagnosis_history.csv"
if not os.path.exists(history_file):
    pd.DataFrame(columns=[
        "date", "patient_name", "symptoms", "medical_history", "test_results",
        "age", "gender", "region", "diagnosis", "medications",
        "treatment_plan", "follow_ups", "match_score"
    ]).to_csv(history_file, index=False)

SIMILARITY_THRESHOLD = 0.6

# Core function
def get_diagnosis_filtered(patient_name, symptoms, history, test_results, age, gender, region):
    # Auto date
    date_today = datetime.today().strftime('%Y-%m-%d')

    # Input checks
    if not patient_name.strip() or not symptoms.strip():
        return "Please enter at least Patient Name and Symptoms."

    user_input = symptoms + " " + history + " " + test_results
    user_embedding = model.encode([user_input], convert_to_tensor=True, device='cpu')

    # Apply filters
    df_filtered = df.copy()
    try:
        if age:
            age = int(age)
            df_filtered = df_filtered[(df_filtered['age'].astype(int) >= age - 5) &
                                      (df_filtered['age'].astype(int) <= age + 5)]
    except:
        pass

    if gender:
        gender = gender.strip().lower()
        df_filtered = df_filtered[df_filtered['gender'].str.strip().str.lower() == gender]
    if region:
        region = region.strip().lower()
        df_filtered = df_filtered[df_filtered['region'].str.strip().str.lower() == region]

    # Default response
    diagnosis_info = {
        "diagnosis": "No matching case",
        "medications": "N/A",
        "treatment_plan": "N/A",
        "follow_ups": "N/A",
        "match_score": 0.0
    }

    if not df_filtered.empty:
        filtered_embeddings = model.encode(df_filtered["combined_input"].tolist(), convert_to_tensor=True, device='cpu')
        similarity_scores = cosine_similarity(user_embedding.cpu(), filtered_embeddings.cpu())[0]
        best_score = similarity_scores.max()
        best_idx = similarity_scores.argmax()

        if best_score >= SIMILARITY_THRESHOLD:
            match = df_filtered.iloc[best_idx]
            diagnosis_info = {
                "diagnosis": match['differential_diagnoses'],
                "medications": match['medications'],
                "treatment_plan": match['treatment_plan'],
                "follow_ups": match['follow_ups'],
                "match_score": float(best_score)
            }

            result = f"""
### Differential Diagnoses
- {diagnosis_info['diagnosis']}

### Medications
- {diagnosis_info['medications']}

### Treatment Plan
- {diagnosis_info['treatment_plan']}

### Follow Ups
- {diagnosis_info['follow_ups']}
"""
        else:
            result = "No sufficiently similar case found. Try providing more detail."
    else:
        result = "No matching cases found for the given filters."

    # Log history
    history_row = {
        "date": date_today,
        "patient_name": patient_name,
        "symptoms": symptoms,
        "medical_history": history,
        "test_results": test_results,
        "age": age,
        "gender": gender,
        "region": region,
        "diagnosis": diagnosis_info["diagnosis"],
        "medications": diagnosis_info["medications"],
        "treatment_plan": diagnosis_info["treatment_plan"],
        "follow_ups": diagnosis_info["follow_ups"],
        "match_score": diagnosis_info["match_score"]
    }

    pd.DataFrame([history_row]).to_csv(history_file, mode='a', header=False, index=False)
    return result

# Gradio UI
demo = gr.Interface(
    fn=get_diagnosis_filtered,
    inputs=[
        gr.Textbox(label="Patient Name"),
        gr.Textbox(label="Symptoms"),
        gr.Textbox(label="Medical History"),
        gr.Textbox(label="Test Results"),
        gr.Textbox(label="Age (Optional)"),
        gr.Textbox(label="Gender (Optional)"),
        gr.Textbox(label="Region (Optional)")
    ],
    outputs=gr.Markdown(label="Diagnosis & Treatment"),
    title="MediMind Diagnostic Chatbot",
    description="AI tool for doctors to triage and log patient visits automatically."
)

demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://084ad7859f8f51fe1a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


