In [4]:
from os.path import join

import pandas as pd

csv = '/Users/jsilva/repositories/MIA/Modulo_3/Clase_Semana_29_CDSS/PET_AUTOPSY.csv'
df = pd.read_csv(csv)
for indx_, row_ in df.iterrows():

    ad_pathology = row_["ALZHEIMER PATHOLOGY"]
    lb_pathology = row_["LEWY PATHOLOGY"]

    if ad_pathology > 1 and lb_pathology < 2:

        group = "AD"

    elif ad_pathology < 2 and lb_pathology > 1:
        group = "LB"

    elif ad_pathology > 1 and lb_pathology > 1:
        group = "Mixed"

    else:
        group = "Negative"

    df.loc[indx_, "Group"] = group

for group_ in ["AD", "LB", "Mixed", "Negative"]:
    print(group_, "= ", len(df.loc[df["Group"] == group_]))

df = df.loc[df["Group"] != "Negative"]
df = df.loc[df["Group"] != "Mixed"]

AD =  28
LB =  8
Mixed =  29
Negative =  8


In [5]:
import nibabel as nib
import numpy as np

img_master_path = '/Users/jsilva/data/MIA/Autopsy'

img_atlas = nib.load(
    '/Users/jsilva/repositories/MIA/Modulo_3/Clase_Semana_29_CDSS/cortex_Harvard_Oxford_orig_atlas.nii.gz')
atlas_data = img_atlas.get_fdata()

df_atlas = pd.read_csv("/Users/jsilva/repositories/MIA/Modulo_3/Clase_Semana_29_CDSS/Harvard_Oxford.csv")

for indx_, row_ in df.iterrows():
    subj_id = row_['SUBJECT_ID']
    fdg_id = row_['FDG_ID']

    subj_img_path = join(img_master_path, f'{subj_id}_I{fdg_id}', 'swfdg_normhist.img')

    img_ = nib.load(subj_img_path)
    data_ = img_.get_fdata()

    for indx_atlas, row_atlas in df_atlas.iterrows():
        roi_name = row_atlas["ROI_NAME"]
        roi_num = row_atlas["ROI_NUM"]

        indx_roi = np.where(atlas_data == roi_num)
        roi_val_subj = np.mean(data_[indx_roi])

        df.loc[indx_, roi_name] = roi_val_subj


In [19]:
from scipy.stats import ttest_ind

df_ad = df.loc[df["Group"] == 'AD']
df_lb = df.loc[df["Group"] == 'LB']

for roi in df.columns[9:]:

    t, p = ttest_ind(df_ad[roi], df_lb[roi])
    if p < 0.005:
        print(f'{roi}: {round(t, 2)}, {round(p, 3)}')
    else:
        df = df.drop(columns=[roi])

Inferior_Temporal_Gyrus_anterior_division: -3.65, 0.001
Intracalcarine_Cortex: 3.42, 0.002
Supracalcarine_Cortex: 3.01, 0.005


In [49]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

Y = df['Group']
X = df.iloc[:, -3:]

accuracy_list = []

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=i)

    model_lrc = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1,
                                   class_weight="balanced", random_state=None, solver='lbfgs', max_iter=100,
                                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

    model_lrc.fit(X_train, y_train)

    y_pred = model_lrc.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)
    accuracy_ = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy_)

print(
    f" Accuracy for Logistic Regression Classifier: {round(np.mean(accuracy_list), 2)} +- {round(np.std(accuracy_list), 2)}")

accuracy_list = []

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=i)

    model_svm = SVC(C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, probability=False,
                    tol=0.001,
                    cache_size=200, class_weight="balanced", verbose=False, max_iter=-1, decision_function_shape='ovr',
                    break_ties=False, random_state=None)

    model_svm.fit(X_train, y_train)

    y_pred = model_svm.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)
    accuracy_ = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy_)

print(
    f" Accuracy for SVM: {round(np.mean(accuracy_list), 2)} +- {round(np.std(accuracy_list), 2)}")

accuracy_list = []

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=i)

    model_rfc = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, min_samples_split=2,
                                       min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt',
                                       max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False,
                                       n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None,
                                       ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

    model_rfc.fit(X_train, y_train)

    y_pred = model_rfc.predict(X_test)
    conf_matrix = confusion_matrix(y_test, y_pred)
    accuracy_ = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy_)

print(
    f" Accuracy for RFC: {round(np.mean(accuracy_list), 2)} +- {round(np.std(accuracy_list), 2)}")
#conf_matrix




 Accuracy for Logistic Regression Classifier: 0.79 +- 0.13
 Accuracy for SVM: 0.81 +- 0.11
 Accuracy for RFC: 0.76 +- 0.1


In [52]:
import pickle

model_lrc.fit(X, Y)

pickle.dump(model_lrc, open('model_lrc.pkl', 'wb'))