In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical


In [4]:
dataMRI = pd.read_csv('mri.csv')
dataCSF = pd.read_csv('csf.csv')
dataUDS = pd.read_csv('uds.csv')

In [5]:
print(dataMRI.head())

       NACCID  NACCICV  NACCBRNV  NACCWMVL  FRONTGRY  FRONTWHT  FRONTCSF  \
0  NACC914950  1535.13   1081.63    504.80       NaN       NaN       NaN   
1  NACC388999  1314.57   1001.09    437.70       NaN       NaN       NaN   
2  NACC550785  1571.92   1210.39    516.57       NaN       NaN       NaN   
3  NACC321645  1417.97   1043.73    431.46       NaN       NaN       NaN   
4  NACC129206  1553.60   1086.93    425.40       NaN       NaN       NaN   

   OCCIPGRY  OCCIPWHT  OCCIPCSF  ...  RSUPTEMM  RSUPMAR  RSUPMARM  RTRTEM  \
0       NaN       NaN       NaN  ...      2.02     7.24      1.89    0.72   
1       NaN       NaN       NaN  ...      2.17     8.92      2.09    0.63   
2       NaN       NaN       NaN  ...      2.01    10.37      1.89    0.75   
3       NaN       NaN       NaN  ...      2.06     8.38      1.97    0.98   
4       NaN       NaN       NaN  ...      2.32    10.38      2.02    1.80   

   RTRTEMM  NACCVNUM    datetime  datetime_UDS  timediff  within-a-year  
0     

In [6]:
print(dataCSF.head())

       NACCID  CSFABETA  CSFABMD  CSFTTAU  CSFTTMD  CSFPTAU  CSFPTMD
0  NACC000441    220.32        2   219.18      2.0    103.1      2.0
1  NACC001235    247.77        1   785.89      1.0    146.7      1.0
2  NACC001634    177.00        2   135.00      2.0     95.0      2.0
3  NACC001689    266.00        8   313.00      8.0     48.0      8.0
4  NACC002539    626.00        8   999.00      8.0    149.0      8.0


In [8]:
print(dataUDS.head())

       NACCID  NACCADC  NACCAGE  NACCVNUM  EDUC  SEX  NACCAPOE  NACCUDSD  \
0  NACC020208      186       69         1  16.0    1       NaN         3   
1  NACC107305      186       74         1  18.0    2       NaN         1   
2  NACC151065      186       86         1  14.0    2       NaN         3   
3  NACC187327      186       68         1  14.0    2       NaN         1   
4  NACC188799      186       78         1  14.0    2       NaN         3   

   NACCALZP  MEMORY  ...  BOSTON  MINTTOTS  CRAFTDRE  DIGFORCT  DIGFORSL  \
0         1     1.0  ...    27.0      29.0       1.0       8.0       6.0   
1         8     0.0  ...    30.0      32.0      15.0       7.0       6.0   
2         7     0.0  ...    26.0      28.0      16.0       3.0       4.0   
3         8     0.5  ...    27.0      29.0      17.0      10.0       7.0   
4         7     0.5  ...    21.0      24.0       0.0       7.0       6.0   

   DIGBACCT  DIGBACLS    datetime  NACCAD3    NACCAD5  
0       7.0       5.0  2020-06

In [9]:
merged_df = pd.merge(pd.merge(dataUDS, dataCSF, on='NACCID', how='outer'), dataMRI, on='NACCID', how='outer')


In [10]:
print(merged_df.head())

       NACCID  NACCADC  NACCAGE  NACCVNUM_x  EDUC  SEX  NACCAPOE  NACCUDSD  \
0  NACC000011     1416       62           1  16.0    2       1.0         3   
1  NACC000034     9661       79           1  15.0    2       4.0         3   
2  NACC000067     2096       60           1  18.0    1       1.0         3   
3  NACC000073     5452       44           1  18.0    2       NaN         1   
4  NACC000095     2578       87           1  16.0    1       2.0         4   

   NACCALZP  MEMORY  ...  RSUPTEMM  RSUPMAR  RSUPMARM  RTRTEM  RTRTEMM  \
0         7     0.5  ...       NaN      NaN       NaN     NaN      NaN   
1         1     0.5  ...       NaN      NaN       NaN     NaN      NaN   
2         7     0.0  ...       NaN      NaN       NaN     NaN      NaN   
3         8     0.0  ...       NaN      NaN       NaN     NaN      NaN   
4         1     1.0  ...       NaN      NaN       NaN     NaN      NaN   

   NACCVNUM_y  datetime_y  datetime_UDS  timediff  within-a-year  
0         NaN      

In [11]:
merged_df = merged_df[['NACCID','SEX', 'EDUC', 'NACCAGE', 'VEG', 'ANIMALS', 'TRAILA', 'TRAILB', 'CRAFTDRE', 'MINTTOTS', 'DIGBACCT', 'MEMPROB', 'DROPACT', 'WRTHLESS', 'BETTER', 'BORED', 'HELPLESS', 'TAXES', 'BILLS', 'REMDATES', 'TRAVEL', 'NACCUDSD']]

In [12]:
merged_df = merged_df.dropna(axis=0, how='any')


In [13]:
merged_df = merged_df[['SEX', 'EDUC', 'NACCAGE', 'VEG', 'ANIMALS', 'TRAILA', 'TRAILB', 'CRAFTDRE', 'MINTTOTS', 'DIGBACCT', 'MEMPROB', 'DROPACT', 'WRTHLESS', 'BETTER', 'BORED', 'HELPLESS', 'TAXES', 'BILLS', 'REMDATES', 'TRAVEL', 'NACCUDSD']]

In [55]:
def transform_value(x):
    if x in [1, 2]:
        return 0
    elif x in [3, 4]:
        return 1
    else:
        return x


In [56]:
merged_df['NACCUDSD'] = merged_df['NACCUDSD'].apply(transform_value)

In [58]:
x = merged_df.iloc[:, :-1].values
y = merged_df.iloc[:, -1].values

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y = to_categorical(y) 

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scalerFunction = StandardScaler()
X_train = scalerFunction.fit_transform(X_train)
X_test = scalerFunction.transform(X_test)

In [72]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='tanh', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(2, activation='sigmoid') 
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [73]:
model.compile(optimizer='RMSProp',
              loss='binary_crossentropy',  
              metrics=['accuracy'])

In [74]:
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)


Epoch 1/50


[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 995us/step - accuracy: 0.8047 - loss: 0.4610 - val_accuracy: 0.8467 - val_loss: 0.3556
Epoch 2/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 612us/step - accuracy: 0.8491 - loss: 0.3383 - val_accuracy: 0.8459 - val_loss: 0.3516
Epoch 3/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 524us/step - accuracy: 0.8504 - loss: 0.3410 - val_accuracy: 0.8490 - val_loss: 0.3516
Epoch 4/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 618us/step - accuracy: 0.8560 - loss: 0.3262 - val_accuracy: 0.8467 - val_loss: 0.3513
Epoch 5/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 615us/step - accuracy: 0.8509 - loss: 0.3285 - val_accuracy: 0.8506 - val_loss: 0.3500
Epoch 6/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 600us/step - accuracy: 0.8549 - loss: 0.3273 - val_accuracy: 0.8521 - val_loss: 0.3499
Epoch 7/50
[1m161/161[0m 

<keras.src.callbacks.history.History at 0x2216dcedb50>

In [75]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 383us/step - accuracy: 0.8307 - loss: 0.3469
Test loss: 0.3486357033252716
Test accuracy: 0.835616409778595


In [76]:
#measuring performance with AUC

model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.AUC(name='auc')])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
results = model.evaluate(X_test, y_test)
print("Test Loss, Test AUC:", results)

Epoch 1/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - auc: 0.9564 - loss: 0.2674 - val_auc: 0.9196 - val_loss: 0.3627
Epoch 2/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 607us/step - auc: 0.9517 - loss: 0.2823 - val_auc: 0.9205 - val_loss: 0.3622
Epoch 3/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 679us/step - auc: 0.9528 - loss: 0.2791 - val_auc: 0.9198 - val_loss: 0.3616
Epoch 4/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 670us/step - auc: 0.9524 - loss: 0.2810 - val_auc: 0.9192 - val_loss: 0.3655
Epoch 5/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 649us/step - auc: 0.9530 - loss: 0.2786 - val_auc: 0.9191 - val_loss: 0.3666
Epoch 6/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 620us/step - auc: 0.9553 - loss: 0.2715 - val_auc: 0.9189 - val_loss: 0.3661
Epoch 7/50
[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6