In [1]:
import pandas as pd

In [2]:
# Load the dataset
file_path = 'BIOM.csv'
data = pd.read_csv(file_path)

In [3]:
# Select relevant columns
selected_columns = ["AGE", "PTGENDER", "FDG", "PIB", "MMSE", "PTMARRY", "APOE4", "DX"]
data_selected = data[selected_columns]

In [4]:
data_cleaned = data_selected.replace("NA", pd.NA)

In [5]:
data_cleaned['AGE'] = data_cleaned['AGE'].fillna(data_cleaned['AGE'].mean())
data_cleaned['FDG'] = data_cleaned['FDG'].fillna(data_cleaned['FDG'].mean())
data_cleaned['PIB'] = data_cleaned['PIB'].fillna(data_cleaned['PIB'].mean())
data_cleaned['MMSE'] = data_cleaned['MMSE'].fillna(data_cleaned['MMSE'].mean())
data_cleaned['APOE4'] = data_cleaned['APOE4'].fillna(data_cleaned['APOE4'].mode()[0])

In [6]:
data_cleaned['PTGENDER'] = data_cleaned['PTGENDER'].fillna(data_cleaned['PTGENDER'].mode()[0])
data_cleaned['PTMARRY'] = data_cleaned['PTMARRY'].fillna(data_cleaned['PTMARRY'].mode()[0])

In [7]:

# Convert categorical variables like PTGENDER and PTMARRY to numerical values
data_cleaned['PTGENDER'] = data_cleaned['PTGENDER'].map({'Male': 0, 'Female': 1})
data_cleaned['PTMARRY'] = data_cleaned['PTMARRY'].map({
    'Married': 0, 'Divorced': 1, 'Widowed': 2, 'Never married': 3, 'Unknown': 4
})

In [8]:
# Map DX column to binary classification:
# 0 = Normal (NL, MCI to NL, NL to MCI)
# 1 = Dementia (Dementia, MCI to Dementia, NL to Dementia)
dx_mapping = {
    'NL': 0,
    'NL to MCI': 0,
    'MCI to NL': 0,
    'MCI': 0,
    'Dementia': 1,
    'MCI to Dementia': 1,
    'NL to Dementia': 1
}

In [9]:
reverse_dx_mapping = {v: k for k, v in dx_mapping.items()}

In [10]:
# Apply the mapping and drop any remaining NA values in the DX column
data_cleaned['DX'] = data_cleaned['DX'].map(dx_mapping).dropna()

In [11]:
data_cleaned = data_cleaned.dropna(subset=['DX'])

In [12]:
# Separate features and target variable
X = data_cleaned.drop(columns=['DX'])
y = data_cleaned['DX']

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [15]:
# Let's now create the stacking model
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [16]:
# Define base learners
base_learners = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42)),
    ('svm', SVC(kernel='linear', probability=True))
]

In [17]:
# Create the stacking ensemble
stacking_model = StackingClassifier(estimators=base_learners, final_estimator=LogisticRegression())

In [18]:
# Train the stacking model
stacking_model.fit(X_train, y_train)

In [19]:
from sklearn.metrics import accuracy_score

In [21]:
y_pred = stacking_model.predict(X_test)

In [22]:
accuracy = accuracy_score(y_test, y_pred)

In [23]:
print(f"Accuracy of the stacking model: {accuracy * 100:.2f}%")

Accuracy of the stacking model: 89.49%


In [None]:
import numpy as np

In [None]:
def get_user_input():
    age = float(input("Enter Age: "))
    ptgender = int(input("Enter Gender (0 = Male, 1 = Female): "))
    fdg = float(input("Enter FDG value: "))
    pib = float(input("Enter PIB value: "))
    mmse = float(input("Enter MMSE score: "))
    ptmarry = int(input("Enter Marital Status (0=Married, 1=Divorced, 2=Widowed, 3=Never married, 4=Unknown): "))
    apoe4 = int(input("Enter APOE4 allele count (0, 1, or 2): "))

    input_data = np.array([[age, ptgender, fdg, pib, mmse, ptmarry, apoe4]])

    return input_data


In [None]:
user_input = get_user_input()

In [None]:
prediction = stacking_model.predict(user_input)
probabilities = stacking_model.predict_proba(user_input)


In [None]:
predicted_class_name = reverse_dx_mapping[prediction[0]]

In [None]:
print(f"Predicted class (0 = NL, 1 = NL to MCI, 2 = MCI, 3 = MCI to Dementia, 4 = Dementia): {prediction[0]}")
print(f"Probability for each class: {probabilities[0]}")
print(predicted_class_name)