In [1]:
!conda install pandas -y
!conda install scikit-learn -y
%pip install streamlit

Channels:
 - defaults
 - conda-forge
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Channels:
 - defaults
 - conda-forge
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd  

# Load datasets
diabetes = pd.read_csv("datasets/diabetes.csv")
heart = pd.read_csv("datasets/heart_disease.csv")
parkinsons = pd.read_csv("datasets/parkinsons.csv")

# Display first few rows
print("Diabetes Dataset:\n", diabetes.head(), "\n")
print("Heart Disease Dataset:\n", heart.head(), "\n")
print("parkinsons Disease Dataset:\n", parkinsons.head(), "\n")

# Check for missing values
print("Missing Values:\n")
print("Diabetes:\n", diabetes.isnull().sum(), "\n")
print("Heart:\n", heart.isnull().sum(), "\n")
print("parkinsons:\n", parkinsons.isnull().sum(), "\n")


Diabetes Dataset:
    Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1   

Heart Disease Dataset:
    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   0       145   233    1        2      150      0      2.3      2   
1   67    1   3       160   286    0        2      108      1      1.5      1   
2   67    1   3

In [3]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

def preprocess_data(df, target_column):
    X = df.drop(columns=[target_column])  # Features
    y = df[target_column]  # Target (labels)

    # Convert categorical columns to numeric using Label Encoding
    for col in X.columns:
        if X[col].dtype == 'object':  # If column is categorical
            X[col] = LabelEncoder().fit_transform(X[col])

    # Normalize numerical features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Split into train & test sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test, scaler

# Run preprocessing again
X_train_diabetes, X_test_diabetes, y_train_diabetes, y_test_diabetes, scaler_diabetes = preprocess_data(diabetes, "Outcome")
X_train_heart, X_test_heart, y_train_heart, y_test_heart, scaler_heart = preprocess_data(heart, "target")
X_train_parkinsons, X_test_parkinsons, y_train_parkinsons, y_test_parkinsons, scaler_parkinsons = preprocess_data(parkinsons, "status")

print("Preprocessing complete!")


Preprocessing complete!


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def train_and_evaluate(X_train, X_test, y_train, y_test, disease_name):
    # Initialize and train the model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{disease_name} Model Accuracy: {accuracy:.4f}")

    return model

# Train models for each dataset
model_diabetes = train_and_evaluate(X_train_diabetes, X_test_diabetes, y_train_diabetes, y_test_diabetes, "Diabetes")
model_heart = train_and_evaluate(X_train_heart, X_test_heart, y_train_heart, y_test_heart, "Heart Disease")
model_parkinsons = train_and_evaluate(X_train_parkinsons, X_test_parkinsons, y_train_parkinsons, y_test_parkinsons, "Parkinson's")


Diabetes Model Accuracy: 0.7532
Heart Disease Model Accuracy: 0.8852
Parkinson's Model Accuracy: 0.8974


In [7]:
import streamlit as st
import pickle
import numpy as np

# Load saved models
with open('diabetes_model.pkl', 'rb') as f:
    diabetes_model = pickle.load(f)

with open('heart_model.pkl', 'rb') as f:
    heart_model = pickle.load(f)

with open('parkinsons_model.pkl', 'rb') as f:
    parkinsons_model = pickle.load(f)

print("Models loaded successfully!")


# Sidebar for selecting disease
disease = st.sidebar.selectbox("Select Disease", ["Diabetes", "Heart Disease", "Parkinson's"])

st.title(f"{disease} Prediction")

if disease == "Diabetes":
    pregnancies = st.number_input("Pregnancies", min_value=0)
    glucose = st.number_input("Glucose Level", min_value=0)
    bp = st.number_input("Blood Pressure", min_value=0)
    skin_thickness = st.number_input("Skin Thickness", min_value=0)
    insulin = st.number_input("Insulin", min_value=0)
    bmi = st.number_input("BMI", min_value=0.0, format="%.2f")
    dpf = st.number_input("Diabetes Pedigree Function", min_value=0.0, format="%.2f")
    age = st.number_input("Age", min_value=0)
    
    input_data = np.array([[pregnancies, glucose, bp, skin_thickness, insulin, bmi, dpf, age]])
    model = diabetes_model
    
elif disease == "Heart Disease":
    age = st.number_input("Age", min_value=0)
    sex = st.selectbox("Sex", ["Male", "Female"])
    cp = st.number_input("Chest Pain Type", min_value=0, max_value=3)
    trestbps = st.number_input("Resting Blood Pressure", min_value=0)
    chol = st.number_input("Cholesterol", min_value=0)
    fbs = st.selectbox("Fasting Blood Sugar > 120 mg/dl", ["No", "Yes"])
    restecg = st.number_input("Rest ECG", min_value=0, max_value=2)
    thalach = st.number_input("Max Heart Rate Achieved", min_value=0)
    exang = st.selectbox("Exercise Induced Angina", ["No", "Yes"])
    oldpeak = st.number_input("ST Depression", min_value=0.0, format="%.2f")
    slope = st.number_input("Slope of Peak Exercise ST Segment", min_value=0, max_value=2)
    ca = st.number_input("Major Vessels Colored", min_value=0, max_value=4)
    thal = st.number_input("Thalassemia Type", min_value=0, max_value=3)
    
    input_data = np.array([[age, 1 if sex == "Male" else 0, cp, trestbps, chol, 1 if fbs == "Yes" else 0, restecg, 
                            thalach, 1 if exang == "Yes" else 0, oldpeak, slope, ca, thal]])
    model = heart_model
    
elif disease == "Parkinson's":
    fo = st.number_input("MDVP:Fo(Hz)")
    fhi = st.number_input("MDVP:Fhi(Hz)")
    flo = st.number_input("MDVP:Flo(Hz)")
    jitter = st.number_input("MDVP:Jitter(%)")
    shimmer = st.number_input("MDVP:Shimmer")
    nhr = st.number_input("NHR")
    hnr = st.number_input("HNR")
    rpde = st.number_input("RPDE")
    dfa = st.number_input("DFA")
    spread1 = st.number_input("Spread1")
    spread2 = st.number_input("Spread2")
    d2 = st.number_input("D2")
    ppe = st.number_input("PPE")
    
    input_data = np.array([[fo, fhi, flo, jitter, shimmer, nhr, hnr, rpde, dfa, spread1, spread2, d2, ppe]])
    model = parkinsons_model
    
if st.button("Predict"):
    prediction = model.predict(input_data)
    result = "Positive" if prediction[0] == 1 else "Negative"
    st.success(f"Prediction: {result}")

UnpicklingError: invalid load key, '\x05'.