In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import warnings

warnings.filterwarnings("ignore")

# === Load datasets ===
cross_df = pd.read_csv("oasis_cross-sectional.csv")
long_df = pd.read_csv("oasis_longitudinal.csv")

# === Preprocess cross-sectional ===
cross_df = cross_df.drop(columns=['ID', 'Hand', 'Delay'], errors='ignore')
cross_df = cross_df.dropna(subset=['CDR', 'MMSE', 'Educ'])
cross_df['M/F'] = cross_df['M/F'].map({'M': 1, 'F': 0})
cross_df['Alzheimers'] = cross_df['CDR'].apply(lambda x: 1 if x >= 0.5 else 0)
cross_df = cross_df.drop(columns=['CDR'], errors='ignore')

# === Preprocess longitudinal ===
long_df = long_df.drop(columns=['Subject ID', 'MRI ID', 'Group', 'Visit', 'MR Delay', 'Hand', 'Delay'], errors='ignore')
long_df = long_df.dropna(subset=['CDR', 'MMSE', 'EDUC'])
long_df['M/F'] = long_df['M/F'].map({'M': 1, 'F': 0})
long_df['Alzheimers'] = long_df['CDR'].apply(lambda x: 1 if x >= 0.5 else 0)
long_df = long_df.drop(columns=['CDR'], errors='ignore')

# === Select features common to both datasets ===
features = ['Age', 'Educ', 'MMSE', 'eTIV', 'nWBV', 'ASF', 'M/F', 'SES', 'Alzheimers']
common_features = list(set(features).intersection(set(cross_df.columns)).intersection(set(long_df.columns)))

cross_df_filtered = cross_df[common_features]
long_df_filtered = long_df[common_features]

combined_df = pd.concat([cross_df_filtered, long_df_filtered], ignore_index=True).dropna()

X = combined_df.drop(columns=['Alzheimers'])
y = combined_df['Alzheimers']

# === Train-test split ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# === Scale features ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === Train stacking classifier ===
lr_model = LogisticRegression(max_iter=1000, class_weight='balanced', solver='liblinear',C=0.1)
rf_model = RandomForestClassifier(n_estimators=200,max_depth=10,min_samples_split=4,random_state=42, class_weight='balanced')

stack = StackingClassifier(
    estimators=[('lr', lr_model), ('rf', rf_model)],
    final_estimator=LogisticRegression(class_weight='balanced', solver='liblinear'),
    cv=5
)
stack.fit(X_train_scaled, y_train)

# === Evaluate ===
y_pred = stack.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# === User input for prediction ===
print("\nEnter values for prediction:")

input_data = {}
for feature in X.columns:
    while True:
        try:
            val = input(f"{feature} ({'1=Male,0=Female' if feature == 'M/F' else 'numeric'}): ")
            if feature == 'M/F':
                val = int(val)
                if val not in [0, 1]:
                    print("Enter 1 for Male or 0 for Female.")
                    continue
            else:
                val = float(val)
            input_data[feature] = val
            break
        except ValueError:
            print("Invalid input, please enter a valid number.")

input_df = pd.DataFrame([input_data])
input_scaled = scaler.transform(input_df)

pred = stack.predict(input_scaled)[0]
prob = stack.predict_proba(input_scaled)[0][1]

print(f"\nPrediction: {'Alzheimers Likely' if pred == 1 else 'Alzheimers Unlikely'}")
print(

f"Probability of Alzheimer's: {prob:.2f}")


Accuracy: 0.9035087719298246

Confusion Matrix:
[[63  5]
 [ 6 40]]

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.93      0.92        68
           1       0.89      0.87      0.88        46

    accuracy                           0.90       114
   macro avg       0.90      0.90      0.90       114
weighted avg       0.90      0.90      0.90       114


Enter values for prediction:


nWBV (numeric):  .75
MMSE (numeric):  28
M/F (1=Male,0=Female):  0
eTIV (numeric):  1200
SES (numeric):  1
Age (numeric):  55
ASF (numeric):  .6



Prediction: Alzheimers Unlikely
Probability of Alzheimer's: 0.38
