In [1]:
import numpy as np
import joblib
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset (assume it is already uploaded)
df = pd.read_csv("alzheimers_disease_data.csv")

# Drop irrelevant columns
df.drop(columns=['PatientID', 'DoctorInCharge'], inplace=True)

# Separate features and target variable
X = df.drop(columns=['Diagnosis'])
y = df['Diagnosis']

# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the Gradient Boosting model
best_gb_model = GradientBoostingClassifier()
best_gb_model.fit(X_train, y_train)

# Save the model and scaler
joblib.dump(best_gb_model, "gradient_boosting_model.pkl")
joblib.dump(scaler, "scaler.pkl")

# Load model and scaler for prediction
def load_model():
    model = joblib.load("gradient_boosting_model.pkl")
    scaler = joblib.load("scaler.pkl")
    return model, scaler

# Function for user input prediction
def predict_alzheimers(user_input):
    model, scaler = load_model()

    # Convert user input to numpy array and reshape
    user_input_array = np.array(user_input).reshape(1, -1)

    # Scale input
    user_input_scaled = scaler.transform(user_input_array)

    # Predict
    prediction = model.predict(user_input_scaled)

    # Return result
    return "Person has Alzheimer's" if prediction[0] == 1 else "Person is Safe"

# Example input (user should input 32 values matching dataset features)
user_sample = [0.5, 1.2, 0.8, 0.6, 1.1, 0.9, 0.7, 0.4, 1.3, 0.6, 0.5, 1.0, 0.8, 0.3, 0.9, 1.2, 0.4, 0.7, 0.5, 1.1, 0.6, 0.8, 0.9, 0.5, 1.2, 0.3, 0.7, 1.0, 0.8, 0.6, 0.4, 1.1]
print("Sample Prediction:", predict_alzheimers(user_sample))



Sample Prediction: Person has Alzheimer's


