In [1]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.52.1-py3-none-any.whl.metadata (9.8 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.1-py3-none-any.whl (9.0 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.0/9.0 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.52.1


In [2]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.5.0


In [3]:
!pip install scikit-learn



In [4]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# ------------------ PAGE CONFIG ------------------
st.set_page_config(page_title="/content/alzheimers_disease_data.csv", layout="wide")
st.title("üß† Alzheimer's Disease Prediction System")

# ------------------ DATASET UPLOAD ------------------
st.sidebar.header("üìÇ Upload Dataset")
uploaded_data = st.sidebar.file_uploader("Upload CSV File", type=["csv"])

if uploaded_data is None:
    st.warning("‚ö† Please upload a dataset to continue.")
    st.stop()

df = pd.read_csv(uploaded_data)
df = df.drop(["PatientID", "DoctorInCharge"], axis=1, errors="ignore")

# ------------------ DATASET SUMMARY ------------------
st.subheader("üìä Dataset Summary")
col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Total Records", df.shape[0])
with col2:
    st.metric("Total Features", df.shape[1] - 1)
with col3:
    st.metric("Target Classes", df["Diagnosis"].nunique())

st.dataframe(df.head())
st.subheader("üìà Statistical Summary")
st.dataframe(df.describe())

# ------------------ DATA SPLITTING ------------------
X = df.drop("Diagnosis", axis=1)
y = df["Diagnosis"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ------------------ MODEL TRAINING ------------------
lr = LogisticRegression(max_iter=1000)
rf = RandomForestClassifier(random_state=42)
knn = KNeighborsClassifier()

models = {"Logistic Regression": lr, "Random Forest": rf, "KNN": knn}
accuracy_results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accuracy_results[name] = accuracy_score(y_test, preds)

best_model_name = max(accuracy_results, key=accuracy_results.get)
best_model = models[best_model_name]

# ------------------ SIDEBAR MENU ------------------
st.sidebar.title("üìå Dashboard Menu")
option = st.sidebar.radio(
    "Select Option:",
    ["Home", "Dataset Analysis", "Model Comparison", "Single Prediction", "Bulk Prediction"]
)

# ------------------ HOME ------------------
if option == "Home":
    st.subheader("üè† System Overview")
    st.write("""
    This system predicts the risk of Alzheimer's Disease using Machine Learning models.

    ‚úÖ Upload your own dataset
    ‚úÖ Automatic Model Training
    ‚úÖ Real-time Prediction
    ‚úÖ Model Comparison
    ‚úÖ Graphical Analysis
    ‚úÖ Bulk Prediction
    """)
    st.success(f"‚úÖ Best Performing Model: {best_model_name}")

# ------------------ DATASET ANALYSIS & GRAPHS ------------------
elif option == "Dataset Analysis":
    st.subheader("üìä Dataset Analysis & Visualization")

    # ---------------- Diagnosis Distribution ----------------
    st.write("### Diagnosis Distribution")
    fig1, ax1 = plt.subplots(figsize=(6,4))
    sns.countplot(data=df, x="Diagnosis", palette="Set2", ax=ax1)
    ax1.set_title("Distribution of Alzheimer's Diagnosis", fontsize=14)
    ax1.set_xlabel("Diagnosis", fontsize=12)
    ax1.set_ylabel("Count", fontsize=12)
    for p in ax1.patches:
        ax1.annotate(int(p.get_height()),
                     (p.get_x() + p.get_width()/2., p.get_height()),
                     ha='center', va='bottom', fontsize=10)
    st.pyplot(fig1)

    # ---------------- Feature Correlation Heatmap ----------------
    numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
    if len(numeric_cols) > 1:
        st.write("### Feature Correlation")
        corr = df[numeric_cols].corr()
        fig2, ax2 = plt.subplots(figsize=(10,6))
        sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5, ax=ax2)
        ax2.set_title("Correlation Heatmap of Numeric Features", fontsize=14)
        st.pyplot(fig2)
    else:
        st.warning("Not enough numeric features to generate correlation heatmap.")

    # ---------------- Boxplots for Age ----------------
    if "Age" in df.columns and "Diagnosis" in df.columns:
        st.write("### Age vs Diagnosis")
        fig3, ax3 = plt.subplots(figsize=(6,4))
        sns.boxplot(x="Diagnosis", y="Age", data=df, palette="Set3", ax=ax3)
        sns.stripplot(x="Diagnosis", y="Age", data=df, color="black", alpha=0.5, jitter=True, ax=ax3)
        ax3.set_title("Age Distribution by Diagnosis", fontsize=14)
        ax3.set_xlabel("Diagnosis", fontsize=12)
        ax3.set_ylabel("Age", fontsize=12)
        st.pyplot(fig3)

    # ---------------- Histograms ----------------
    st.write("### Histograms of Numeric Features")
    for col in numeric_cols:
        fig, ax = plt.subplots(figsize=(6,3))
        sns.histplot(df[col], kde=True, color="skyblue", ax=ax)
        ax.set_title(f"Distribution of {col}", fontsize=12)
        st.pyplot(fig)

    # ---------------- Feature Correlation with Target ----------------
    if "Diagnosis" in df.columns and len(numeric_cols) > 0:
        st.write("### Correlation of Numeric Features with Target")
        try:
            if df["Diagnosis"].dtype == "object":
                df["Diagnosis_num"] = pd.factorize(df["Diagnosis"])[0]
            else:
                df["Diagnosis_num"] = df["Diagnosis"]
            corr_target = df[numeric_cols + ["Diagnosis_num"]].corr()["Diagnosis_num"].sort_values(ascending=False)
            corr_target = corr_target.drop("Diagnosis_num")
            fig, ax = plt.subplots(figsize=(6,3))
            sns.barplot(x=corr_target.index, y=corr_target.values, palette="Set1", ax=ax)
            ax.set_ylabel("Correlation with Diagnosis", fontsize=12)
            ax.set_title("Feature Correlation with Target", fontsize=14)
            for p in ax.patches:
                ax.annotate(f"{p.get_height():.2f}", (p.get_x() + p.get_width()/2., p.get_height()),
                            ha='center', va='bottom', fontsize=10)
            st.pyplot(fig)
            df.drop("Diagnosis_num", axis=1, inplace=True)
        except Exception as e:
            st.error(f"Cannot generate feature-target correlation: {e}")

# ------------------ MODEL COMPARISON ------------------
elif option == "Model Comparison":
    st.subheader("üìà Model Accuracy Comparison")

    acc_df = pd.DataFrame(list(accuracy_results.items()), columns=["Model", "Accuracy"]).sort_values("Accuracy", ascending=False)
    st.dataframe(acc_df)

    fig, ax = plt.subplots(figsize=(6,4))
    sns.barplot(x="Model", y="Accuracy", data=acc_df, palette="Set1", ax=ax)
    ax.set_ylim(0,1)
    ax.set_ylabel("Accuracy", fontsize=12)
    ax.set_title("Model Accuracy Comparison", fontsize=14)
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.2f}", (p.get_x() + p.get_width()/2., p.get_height()),
                    ha='center', va='bottom', fontsize=10)
    st.pyplot(fig)
    st.success(f"‚úÖ Best Model Selected Automatically: {best_model_name}")

# ------------------ SINGLE PATIENT PREDICTION ------------------
elif option == "Single Prediction":
    st.subheader("üîç Single Patient Prediction")
    st.info("Default values are auto-filled using dataset averages. You may edit them.")

    input_data = []
    default_values = X.mean()
    for col in X.columns:
        val = st.number_input(f"Enter {col}", value=float(default_values[col]))
        input_data.append(val)

    input_array = np.array(input_data).reshape(1, -1)
    if st.button("Predict"):
        prediction = best_model.predict(input_array)
        if prediction[0] == 1:
            st.error("‚ö† High Risk of Alzheimer's Disease Detected")
        else:
            st.success("‚úÖ Low Risk of Alzheimer's Disease")

# ------------------ BULK CSV PREDICTION ------------------
elif option == "Bulk Prediction":
    # <-- This line must be indented (typically 4 spaces)
    st.subheader("üìÇ Bulk Dataset Prediction")
    upload_file = st.file_uploader("Upload New CSV for Prediction", type=["csv"])
    if upload_file is not None:
        # more indented code here
        ...



Writing app.py


In [5]:
!npm install -g localtunnel

[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K‚†è[1G[0K‚†ã[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K‚†è[1G[0K‚†ã[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K‚†è[1G[0K
added 22 packages in 3s
[1G[0K‚†è[1G[0K
[1G[0K‚†è[1G[0K3 packages are looking for funding
[1G[0K‚†è[1G[0K  run `npm fund` for details
[1G[0K‚†è[1G[0K

In [None]:
!streamlit run app.py --server.port 8501 & npx localtunnel --port 8501

[1G[0K‚†ô[1G[0K‚†π
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K‚†è[1G[0K‚†ã[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K‚†è[1G[0K‚†ã[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0Kyour url is: https://fresh-wolves-juggle.loca.lt
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.190.179.140:8501[0m
[0m
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i =