<a href="https://colab.research.google.com/github/Anarghapal/My-project/blob/main/Medical%20Recommendation%20System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Simple MedDoc - Medical Prognosis App (Colab Version)

# Install required packages
!pip install streamlit pandas numpy scikit-learn matplotlib seaborn pyngrok

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
import pickle
import os
from google.colab import files
import streamlit as st
from pyngrok import ngrok
import time
from datetime import datetime

# Function to upload dataset
def upload_dataset():
    print("Please upload your Training2.csv file")
    uploaded = files.upload()

    if uploaded:
        for filename in uploaded.keys():
            print(f"Uploaded {filename}")
            df = pd.read_csv(filename)
            return df
    return None

# Upload dataset
df = upload_dataset()

if df is None:
    print("No file uploaded. Exiting.")
else:
    # Clean dataset
    # Remove empty and unnamed columns
    cols_to_drop = [col for col in df.columns if col == '' or not col.strip() or df[col].nunique() == 0]
    if cols_to_drop:
        df = df.drop(columns=cols_to_drop)
        print(f"Removed {len(cols_to_drop)} empty/unnamed columns")

    # Split features and target
    if 'prognosis' not in df.columns:
        print("Error: Dataset must contain a 'prognosis' column")
    else:
        X = df.drop('prognosis', axis=1)
        y = df['prognosis']

        # Encode target variable
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(y)

        print(f"Dataset shape: {df.shape}")
        print(f"Number of symptoms: {X.shape[1]}")
        print(f"Number of unique prognoses: {len(np.unique(y))}")

        # Split data for training
        X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

        # Handle missing data
        imputer = SimpleImputer(strategy='mean')
        X_train_imputed = imputer.fit_transform(X_train)
        X_test_imputed = imputer.transform(X_test)

        # Train and compare models
        models = {
            'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
            'Gradient Boosting': GradientBoostingClassifier(random_state=42),
            'SVM': SVC(probability=True, random_state=42),
            'KNN': KNeighborsClassifier(n_neighbors=5),
            'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
            'Naive Bayes': GaussianNB()
        }

        print("\nTraining and comparing models...")
        model_results = []

        for name, model in models.items():
            print(f"Training {name}...")
            model.fit(X_train_imputed, y_train)
            y_pred = model.predict(X_test_imputed)
            accuracy = accuracy_score(y_test, y_pred)
            model_results.append({'Model': name, 'Accuracy': accuracy})
            print(f"{name} Accuracy: {accuracy:.4f}")

        # Find best model
        best_model_info = max(model_results, key=lambda x: x['Accuracy'])
        best_model_name = best_model_info['Model']
        best_model = models[best_model_name]

        print(f"\nBest model: {best_model_name} with accuracy {best_model_info['Accuracy']:.4f}")

        # Save model and required files
        with open('model.pkl', 'wb') as f:
            pickle.dump(best_model, f)

        with open('imputer.pkl', 'wb') as f:
            pickle.dump(imputer, f)

        with open('label_encoder.pkl', 'wb') as f:
            pickle.dump(label_encoder, f)

        with open('feature_list.pkl', 'wb') as f:
            pickle.dump(list(X.columns), f)

        print("Model and related files saved successfully!")

        # Create Streamlit app
        print("\nCreating Streamlit app...")

        # Write app code to file
        with open('app.py', 'w') as f:
            f.write('''
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import os
from datetime import datetime

# Page config
st.set_page_config(
    page_title="MedDoc - Symptom Checker",
    page_icon="🩺",
    layout="wide"
)

# Add some CSS styling
st.markdown("""
<style>
    .main-title {
        font-size: 32px;
        font-weight: bold;
        color: #2c3e50;
        text-align: center;
        margin-bottom: 20px;
    }
    .result-box {
        background-color: #e8f4f8;
        padding: 20px;
        border-radius: 5px;
        margin-top: 20px;
        text-align: center;
    }
    .warning {
        color: #e74c3c;
        font-style: italic;
    }
    .symptom-tag {
        display: inline-block;
        background-color: #e1f5fe;
        padding: 2px 8px;
        margin: 2px;
        border-radius: 12px;
    }
</style>
""", unsafe_allow_html=True)

# Initialize session state for prognosis log
if 'prognosis_log' not in st.session_state:
    st.session_state.prognosis_log = []

# Load model and related files
@st.cache_resource
def load_files():
    # Load model
    with open('model.pkl', 'rb') as f:
        model = pickle.load(f)

    # Load imputer
    with open('imputer.pkl', 'rb') as f:
        imputer = pickle.load(f)

    # Load label encoder
    with open('label_encoder.pkl', 'rb') as f:
        label_encoder = pickle.load(f)

    # Load feature list
    with open('feature_list.pkl', 'rb') as f:
        feature_list = pickle.load(f)

    return model, imputer, label_encoder, feature_list

# Main title
st.markdown("<div class='main-title'>MedDoc: Medical Prognosis Prediction</div>", unsafe_allow_html=True)
st.markdown("Select your symptoms and get a potential prognosis")

# Load files
model, imputer, label_encoder, feature_list = load_files()

# Create tabs
tab1, tab2 = st.tabs(["Symptom Checker", "Prognosis Log"])

with tab1:
    # Search box for symptoms
    search = st.text_input("Search for symptoms:")

    # Filter symptoms based on search
    filtered_symptoms = feature_list
    if search:
        filtered_symptoms = [s for s in feature_list if search.lower() in s.lower()]

    # Group symptoms by category for better organization
    pain_symptoms = [s for s in filtered_symptoms if 'pain' in s or 'ache' in s]
    skin_symptoms = [s for s in filtered_symptoms if 'skin' in s or 'rash' in s]
    digestive_symptoms = [s for s in filtered_symptoms if 'stomach' in s or 'vomit' in s or 'nausea' in s]
    other_symptoms = [s for s in filtered_symptoms if s not in pain_symptoms + skin_symptoms + digestive_symptoms]

    selected_symptoms = []

    # Display symptoms by category using expanders
    with st.expander("Pain Symptoms", expanded=True):
        if not pain_symptoms:
            st.write("No matching symptoms found")
        else:
            cols = st.columns(3)
            for i, symptom in enumerate(sorted(pain_symptoms)):
                with cols[i % 3]:
                    display_name = symptom.replace('_', ' ').title()
                    if st.checkbox(display_name, key=f"pain_{symptom}"):
                        selected_symptoms.append(symptom)

    with st.expander("Skin Symptoms"):
        if not skin_symptoms:
            st.write("No matching symptoms found")
        else:
            cols = st.columns(3)
            for i, symptom in enumerate(sorted(skin_symptoms)):
                with cols[i % 3]:
                    display_name = symptom.replace('_', ' ').title()
                    if st.checkbox(display_name, key=f"skin_{symptom}"):
                        selected_symptoms.append(symptom)

    with st.expander("Digestive Symptoms"):
        if not digestive_symptoms:
            st.write("No matching symptoms found")
        else:
            cols = st.columns(3)
            for i, symptom in enumerate(sorted(digestive_symptoms)):
                with cols[i % 3]:
                    display_name = symptom.replace('_', ' ').title()
                    if st.checkbox(display_name, key=f"digestive_{symptom}"):
                        selected_symptoms.append(symptom)

    with st.expander("Other Symptoms"):
        if not other_symptoms:
            st.write("No matching symptoms found")
        else:
            cols = st.columns(3)
            for i, symptom in enumerate(sorted(other_symptoms)):
                with cols[i % 3]:
                    display_name = symptom.replace('_', ' ').title()
                    if st.checkbox(display_name, key=f"other_{symptom}"):
                        selected_symptoms.append(symptom)

    # Display selected symptoms
    if selected_symptoms:
        st.markdown("### Selected Symptoms:")
        cols = st.columns(3)
        for i, symptom in enumerate(selected_symptoms):
            with cols[i % 3]:
                st.markdown(f"<span class='symptom-tag'>{symptom.replace('_', ' ').title()}</span>", unsafe_allow_html=True)

    # Generate prognosis button
    col1, col2 = st.columns([3, 1])
    with col1:
        generate_btn = st.button("Generate Prognosis", type="primary", use_container_width=True)
    with col2:
        clear_btn = st.button("Clear All", use_container_width=True)
        if clear_btn:
            st.rerun()

    # Generate prognosis when button is clicked
    if generate_btn:
        if not selected_symptoms:
            st.warning("Please select at least one symptom")
        else:
            with st.spinner("Analyzing symptoms..."):
                # Create input data
                input_data = pd.DataFrame(0, index=[0], columns=feature_list)
                for symptom in selected_symptoms:
                    input_data.loc[0, symptom] = 1

                # Apply imputation
                input_imputed = imputer.transform(input_data)

                # Predict prognosis
                proba = model.predict_proba(input_imputed)[0]
                top_indices = proba.argsort()[-3:][::-1]  # Get top 3
                top_prognoses = label_encoder.inverse_transform(top_indices)
                top_confidences = proba[top_indices]

                # Log the prediction
                timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                log_entry = {
                    "timestamp": timestamp,
                    "symptoms": [s.replace('_', ' ').title() for s in selected_symptoms],
                    "prognosis": top_prognoses[0],
                    "confidence": float(top_confidences[0]),
                    "alternatives": [
                        {"prognosis": p, "confidence": float(c)}
                        for p, c in zip(top_prognoses[1:], top_confidences[1:])
                    ]
                }
                st.session_state.prognosis_log.insert(0, log_entry)

            # Display results
            st.markdown("<div class='result-box'>", unsafe_allow_html=True)
            st.markdown(f"### Predicted Prognosis: {top_prognoses[0]}")
            st.markdown(f"Confidence: {top_confidences[0]:.2%}")

            st.markdown("### Alternative Possibilities:")
            for prognosis, confidence in zip(top_prognoses[1:], top_confidences[1:]):
                st.write(f"- {prognosis} ({confidence:.2%})")

            st.markdown("<p class='warning'>Note: This is not a medical diagnosis. Please consult a healthcare professional.</p>", unsafe_allow_html=True)
            st.markdown("</div>", unsafe_allow_html=True)

with tab2:
    st.markdown("### Prognosis History")

    if not st.session_state.prognosis_log:
        st.info("No prognosis records yet. Use the Symptom Checker to generate prognoses.")
    else:
        for i, entry in enumerate(st.session_state.prognosis_log):
            with st.expander(f"{entry['prognosis']} - {entry['timestamp']}"):
                st.write(f"*Predicted Prognosis:* {entry['prognosis']} ({entry['confidence']:.2%})")

                st.write("*Selected Symptoms:*")
                st.write(", ".join(entry['symptoms']))

                if entry.get('alternatives'):
                    st.write("*Alternative Possibilities:*")
                    for alt in entry['alternatives']:
                        st.write(f"- {alt['prognosis']} ({alt['confidence']:.2%})")

# Footer
st.markdown("---")
st.markdown("MedDoc | Not for actual medical use | Consult healthcare professionals for medical advice")
''')

        # Ask user for ngrok auth token
        ngrok_token = input("\nPlease enter your ngrok auth token (get one for free at https://ngrok.com): ")
        if not ngrok_token.strip():
            print("No token provided. Using temporary connection which may have limitations.")
        else:
            ngrok.set_auth_token(ngrok_token)

        # Launch Streamlit
        print("Starting Streamlit app...")
        port = 8501
        public_url = ngrok.connect(port)
        print(f"Streamlit app URL: {public_url}")

        # Run Streamlit app
        os.system(f"streamlit run app.py --server.port {port} &")

        # Keep the process alive
        print("Streamlit app is running. Press Ctrl+C to stop.")

        # Wait for the user to press Ctrl+C
        try:
            while True:
                time.sleep(1)
        except KeyboardInterrupt:
            print("Stopping Streamlit app...")
            ngrok.kill()