In [None]:
%conda install pandas -y
%conda install scikit-learn -y
%pip install streamlit

In [None]:
import pandas as pd  

# Load datasets
diabetes = pd.read_csv("datasets/diabetes.csv")
heart = pd.read_csv("datasets/heart_disease.csv")
parkinsons = pd.read_csv("datasets/parkinsons.csv")
parkinsons = parkinsons.drop(columns=['name'], axis=1)

# Display first few rows
print("Diabetes Dataset:\n", diabetes.head(), "\n")
print("Heart Disease Dataset:\n", heart.head(), "\n")
print("parkinsons Disease Dataset:\n", parkinsons.head(), "\n")

# Check for missing values
print("Missing Values:\n")
print("Diabetes:\n", diabetes.isnull().sum(), "\n")
print("Heart:\n", heart.isnull().sum(), "\n")
print("parkinsons:\n", parkinsons.isnull().sum(), "\n")

# Drop non-feature columns


In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import joblib

def preprocess_data(df, target_column, scaler_path, encoder_path):
    X = df.drop(columns=[target_column])  # Features
    y = df[target_column]  # Target (labels)

    # Convert categorical columns to numeric using Label Encoding
    encoders = {}
    for col in X.columns:
        if X[col].dtype == 'object':  # If column is categorical
            encoders[col] = LabelEncoder()
            X[col] = encoders[col].fit_transform(X[col])

    # Normalize numerical features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Split into train & test sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Save scaler and encoders
    joblib.dump(scaler, scaler_path)
    joblib.dump(encoders, encoder_path)

    return X_train, X_test, y_train, y_test

# Run preprocessing and save scalers
X_train_diabetes, X_test_diabetes, y_train_diabetes, y_test_diabetes = preprocess_data(
    diabetes, "Outcome", "Models/diabetes_scaler.pkl", "Models/diabetes_encoders.pkl"
)
X_train_heart, X_test_heart, y_train_heart, y_test_heart = preprocess_data(
    heart, "target", "Models/heart_scaler.pkl", "Models/heart_encoders.pkl"
)
X_train_parkinsons, X_test_parkinsons, y_train_parkinsons, y_test_parkinsons = preprocess_data(
    parkinsons, "status", "Models/parkinsons_scaler.pkl", "Models/parkinsons_encoders.pkl"
)

print("Preprocessing complete and scalers saved!")


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def train_and_evaluate(X_train, X_test, y_train, y_test, disease_name):
    # Initialize and train the model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{disease_name} Model Accuracy: {accuracy:.4f}")

    return model

# Train models for each dataset
model_diabetes = train_and_evaluate(X_train_diabetes, X_test_diabetes, y_train_diabetes, y_test_diabetes, "Diabetes")
model_heart = train_and_evaluate(X_train_heart, X_test_heart, y_train_heart, y_test_heart, "Heart Disease")
model_parkinsons = train_and_evaluate(X_train_parkinsons, X_test_parkinsons, y_train_parkinsons, y_test_parkinsons, "Parkinson's")

In [None]:
import joblib

# Save trained models
# Save trained models
joblib.dump(model_diabetes, 'Models/diabetes_model.pkl')
joblib.dump(model_heart, 'Models/heart_model.pkl')
joblib.dump(model_parkinsons, 'Models/parkinsons_model.pkl')

print("Models saved successfully!")

In [None]:

# TEST PREDICTION TO SEE IF SCALAR IS WORKING OR NOT 


import numpy as np
import pandas as pd   #TESTTSTS
import joblib

# Load model and scaler
model_diabetes = joblib.load('Models/diabetes_model.pkl')
scaler_diabetes = joblib.load('Models/diabetes_scaler.pkl')

# Define feature names (use the same column names from training data)
feature_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 
                 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']

# Test with non-diabetic values
test_input = np.array([[0, 85, 70, 20, 80, 22.0, 0.2, 25]])  # Example input

# Convert to DataFrame to keep column names
test_input_df = pd.DataFrame(test_input, columns=feature_names)

# Scale the test input
test_input_scaled = scaler_diabetes.transform(test_input_df)  # Now using DataFrame

# Make prediction
prediction = model_diabetes.predict(test_input_scaled)

print("Prediction:", prediction)  # Now it should work without the warning
