In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications import inception_resnet_v2
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input
from tensorflow.keras.utils import img_to_array, array_to_img
from google.colab.patches import cv2_imshow
import os
import zipfile
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import collections

In [3]:
base_path='/content/drive/MyDrive/Dataset/Dementia_sample/Real_data'

In [4]:
df=pd.read_excel('/content/drive/MyDrive/Dataset/ADNI_tabular/ADNI_subjects.xlsx')
df

Unnamed: 0,PHASE,RID,PTID,EXAMDATE,Gender,Birthdate,Age,FINAL_DX,AD severity,Certain or Not,CDR,MMSCORE,Bilateral hippocampus,D_subtract,APGEN1,APGEN2,APOE carrier
0,ADNI2,5040,070_S_5040,2013-07-25,2,1936-12-01,77,normal cognition-NC,,,0.0,30,0.005795,0,3,3,0
1,ADNI2,295,002_S_0295,2011-06-02,1,1921-06-01,90,normal cognition-NC,,,0.0,28,0.004041,0,3,4,1
2,ADNI2,295,002_S_0295,2012-05-10,1,1921-06-01,91,normal cognition-NC,,,0.0,22,0.003914,0,3,4,1
3,ADNI1,413,002_S_0413,2006-05-19,2,1929-12-01,77,normal cognition,,4.0,0.0,29,0.004400,0,3,3,0
4,ADNI1,413,002_S_0413,2006-11-15,2,1929-12-01,77,normal cognition,,4.0,0.0,29,0.004296,0,3,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5187,ADNI3,6345,941_S_6345,2018-05-10,1,1939-01-01,79,MCI,,,0.5,28,0.003906,0,3,4,1
5188,ADNI3,6345,941_S_6345,2019-05-29,1,1939-01-01,80,Mild AD,1.0,,1.0,24,0.003759,0,3,4,1
5189,ADNI3,6345,941_S_6345,2020-09-08,1,1939-01-01,82,Mild AD,1.0,,1.0,22,0.003553,0,3,4,1
5190,ADNI3,6854,941_S_6854,2020-02-14,1,1933-11-01,86,Mild AD,1.0,,0.5,30,0.003654,0,3,4,1


In [5]:
df = df.drop(['PHASE','RID','AD severity','Certain or Not','CDR'], axis=1)
df

Unnamed: 0,PTID,EXAMDATE,Gender,Birthdate,Age,FINAL_DX,MMSCORE,Bilateral hippocampus,D_subtract,APGEN1,APGEN2,APOE carrier
0,070_S_5040,2013-07-25,2,1936-12-01,77,normal cognition-NC,30,0.005795,0,3,3,0
1,002_S_0295,2011-06-02,1,1921-06-01,90,normal cognition-NC,28,0.004041,0,3,4,1
2,002_S_0295,2012-05-10,1,1921-06-01,91,normal cognition-NC,22,0.003914,0,3,4,1
3,002_S_0413,2006-05-19,2,1929-12-01,77,normal cognition,29,0.004400,0,3,3,0
4,002_S_0413,2006-11-15,2,1929-12-01,77,normal cognition,29,0.004296,0,3,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...
5187,941_S_6345,2018-05-10,1,1939-01-01,79,MCI,28,0.003906,0,3,4,1
5188,941_S_6345,2019-05-29,1,1939-01-01,80,Mild AD,24,0.003759,0,3,4,1
5189,941_S_6345,2020-09-08,1,1939-01-01,82,Mild AD,22,0.003553,0,3,4,1
5190,941_S_6854,2020-02-14,1,1933-11-01,86,Mild AD,30,0.003654,0,3,4,1


In [7]:
df[(df['PTID'] == '002_S_0295') & (df['EXAMDATE'] == '20110602')]

Unnamed: 0,PTID,EXAMDATE,Gender,Birthdate,Age,FINAL_DX,MMSCORE,Bilateral hippocampus,D_subtract,APGEN1,APGEN2,APOE carrier
1,002_S_0295,2011-06-02,1,1921-06-01,90,normal cognition-NC,28,0.004041,0,3,4,1


In [8]:
# #알집 해제
# def open_zip(Data_path):
#     os.chdir(Data_path)
#     zip_files = os.listdir(Data_path)
#     zip_files=[files for files in zip_files if '.zip' in files]
#     for zip_file in zip_files:
#         zipfile.ZipFile(os.path.join(Data_path,zip_file)).extractall()
#     return os.listdir(Data_path)
# data_listdir=open_zip(base_path)

In [10]:

def batch_generator(Data_path,batch_size=2):
    fold = collections.defaultdict(list)
    os.chdir(Data_path)
    Data_list = os.listdir(Data_path)
    folder_list = [folder for folder in Data_list if os.path.isdir(os.path.join(Data_path, folder))]
    print(folder_list)
    for folder in folder_list:
        # print(folder)
        files_in_folder=os.listdir(os.path.join(Data_path,folder))
        # print(files_in_folder)
        fold[folder] = files_in_folder
        # print(fold)
    return fold

fold_data = batch_generator(base_path)


['Normal', 'Mild AD']


In [11]:
fold_data.keys()

dict_keys(['Normal', 'Mild AD'])

In [None]:
data_list = []
data_dict = {}
for class_label, subject_folds_list in fold_data.items():
    for subject_folder in subject_folds_list:
        parts = subject_folder.split('_')
        #dtype str, str
        subject_id, examdate = '_'.join(parts[0:3]), '20'+parts[3]
        ########examdate에 중간바('-') 를 join 안 시켰는데 인식함.
        result_row = df[(df['PTID'] == subject_id) & (df['EXAMDATE'] == examdate)]
        # print(result_row['Gender'].values.shape)#(1,)
        # print(result_row['Gender'].values[0].shape)#() value값으로 받기 위해  dim 제거
        if not result_row.empty:
            print(f"Class: {class_label}, Subject: {subject_folder}, Gender: {result_row['Gender'].values[0]}, Age: {result_row['Age'].values[0]},")
        image_dir=os.path.join(base_path, class_label,subject_folder)
        image_paths = [os.path.join(image_dir, filename) for filename in os.listdir(image_dir) if filename.endswith('.png')]

            # 데이터를 딕셔너리에 저장
            ###### image_paths -> path, Image_Number 가 list형태로 들어갔는데 고침.
        data_dict = {
            'image_path': image_dir,
            'Gender': result_row['Gender'].values[0],
            'Age': result_row["Age"].values[0],
            'Image_Number': [int(os.path.splitext(os.path.basename(path))[0].split('plane')[-1]) for path in image_paths],
            'label': class_label
        }

            # 리스트에 딕셔너리 추가
        data_list.append(data_dict)
final_df = pd.DataFrame(data_list)


In [13]:
final_df


Unnamed: 0,image_path,Gender,Age,Image_Number,label
0,/content/drive/MyDrive/Dataset/Dementia_sample...,1,90,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Normal
1,/content/drive/MyDrive/Dataset/Dementia_sample...,1,91,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Normal
2,/content/drive/MyDrive/Dataset/Dementia_sample...,2,82,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Normal
3,/content/drive/MyDrive/Dataset/Dementia_sample...,2,83,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Normal
4,/content/drive/MyDrive/Dataset/Dementia_sample...,2,83,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Normal
...,...,...,...,...,...
609,/content/drive/MyDrive/Dataset/Dementia_sample...,1,78,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Mild AD
610,/content/drive/MyDrive/Dataset/Dementia_sample...,2,88,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Mild AD
611,/content/drive/MyDrive/Dataset/Dementia_sample...,2,88,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Mild AD
612,/content/drive/MyDrive/Dataset/Dementia_sample...,2,79,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",Mild AD


In [16]:
## label2idx
final_df['label'] = final_df['label'].map({
    'Normal':0,
    'Mild AD' :1
})

In [None]:
final_df.head()
# final_df.to_csv('/content/drive/MyDrive/Dataset/Dementia_sample/data_load.csv',index=False)

In [None]:
final_df = pd.read_csv('/content/drive/MyDrive/Dataset/Dementia_sample/data_load.csv')

In [71]:
final_df

Unnamed: 0,image_path,Gender,Age,Image_Number,label
0,/content/drive/MyDrive/Dataset/Dementia_sample...,1,90,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
1,/content/drive/MyDrive/Dataset/Dementia_sample...,1,91,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
2,/content/drive/MyDrive/Dataset/Dementia_sample...,2,82,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
3,/content/drive/MyDrive/Dataset/Dementia_sample...,2,83,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
4,/content/drive/MyDrive/Dataset/Dementia_sample...,2,83,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
...,...,...,...,...,...
609,/content/drive/MyDrive/Dataset/Dementia_sample...,1,78,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1
610,/content/drive/MyDrive/Dataset/Dementia_sample...,2,88,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1
611,/content/drive/MyDrive/Dataset/Dementia_sample...,2,88,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1
612,/content/drive/MyDrive/Dataset/Dementia_sample...,2,79,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1


In [51]:
len(final_df['label']==0)

614

In [54]:
len(final_df[(final_df['Gender']== 1) & (final_df['label']==0)])

121

In [18]:
final_df

Unnamed: 0,image_path,Gender,Age,Image_Number,label
0,/content/drive/MyDrive/Dataset/Dementia_sample...,1,90,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
1,/content/drive/MyDrive/Dataset/Dementia_sample...,1,91,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
2,/content/drive/MyDrive/Dataset/Dementia_sample...,2,82,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
3,/content/drive/MyDrive/Dataset/Dementia_sample...,2,83,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
4,/content/drive/MyDrive/Dataset/Dementia_sample...,2,83,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",0
...,...,...,...,...,...
609,/content/drive/MyDrive/Dataset/Dementia_sample...,1,78,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1
610,/content/drive/MyDrive/Dataset/Dementia_sample...,2,88,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1
611,/content/drive/MyDrive/Dataset/Dementia_sample...,2,88,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1
612,/content/drive/MyDrive/Dataset/Dementia_sample...,2,79,"[0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,...",1


In [20]:
use_colab = True
assert use_colab in [True, False]
is_train = True

In [21]:
if use_colab:
    checkpoint_dir ='/content/drive/MyDrive/아이펠/ai_트랙/train_ckpt/medical_baseline/exp1'
    if not os.path.isdir(checkpoint_dir):
        os.makedirs(checkpoint_dir)
else:
    checkpoint_dir = './train_ckpt/segmentation/exp1'

In [22]:
inception_res_model = tf.keras.applications.InceptionResNetV2(
    include_top=False,
    weights='imagenet',
    input_shape=(299, 299, 3)
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [23]:
final_df[['Gender','Age','Image_Number']].shape

(614, 3)

In [25]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   image_path    614 non-null    object
 1   Gender        614 non-null    int64 
 2   Age           614 non-null    int64 
 3   Image_Number  614 non-null    object
 4   label         614 non-null    int64 
dtypes: int64(3), object(2)
memory usage: 24.1+ KB


In [26]:
################################ 엑셀 가져올 부분

from sklearn.model_selection import train_test_split

train_data,valid_data, train_labels,valid_labels = \
    train_test_split(final_df[['image_path','Gender','Age','Image_Number']], final_df['label'], test_size=0.2, random_state=42)
train_data,test_data, train_labels,test_labels = \
    train_test_split(train_data, train_labels, test_size=0.2, random_state=42)
"""
train_data, train_labels 0.6
valid_data, valid_labels 0.2
test_data, test_labels 0.2
"""
print(train_data.shape, valid_data.shape, test_data.shape)
print(train_labels.shape, valid_labels.shape, test_labels.shape)


(392, 4) (123, 4) (99, 4)
(392,) (123,) (99,)


In [95]:
print(type(train_data.iloc[0,0]))

<class 'str'>


In [None]:
def preprocess_and_augment(train_data, diagnose_data,label, training = True):
    image = tf.io.read_file(train_data)
    print(image.shape)
    image = tf.image.decode_jpeg(image, channels=3)
    print(image.shape)
    image = tf.image.convert_image_dtype(image, tf.float32)
    # 이미지 증강
    if training:
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        image = tf.image.random_brightness(image, max_delta=0.2)
        image = tf.image.random_contrast(image, lower=0.8, upper=1.2)

    image = tf.image.resize(image, [299, 299])

    label = tf.cast(label, np.int32)
    return image, diagnose_data, label

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_data.iloc[:,0],train_data.iloc[:,1:], train_labels))
train_dataset = train_dataset.map(preprocess_and_augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.cache().batch(30)
train_dataset

()
(None, None, 3)


<_BatchDataset element_spec=(TensorSpec(shape=(None, 299, 299, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 3), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [None]:
valid_dataset = tf.data.Dataset.from_tensor_slices((valid_data.iloc[:,0],valid_data.iloc[:,1:], valid_labels))
valid_dataset = valid_dataset.map(lambda x1, x2, y: preprocess_and_augment(x1, x2, y, training=False) , num_parallel_calls=tf.data.experimental.AUTOTUNE)
valid_dataset = valid_dataset.cache().batch(batch_size=30)
valid_dataset

()
(None, None, 3)


<_BatchDataset element_spec=(TensorSpec(shape=(None, 299, 299, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 3), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [None]:
test_dataset = tf.data.Dataset.from_tensor_slices((test_data.iloc[:,0],test_data.iloc[:,1:], test_labels))
test_dataset = test_dataset.map(lambda x1, x2, y: preprocess_and_augment(x1, x2, y, training=False) , num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.cache().batch(batch_size=30)
test_dataset

()
(None, None, 3)


<_BatchDataset element_spec=(TensorSpec(shape=(None, 299, 299, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 3), dtype=tf.int64, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [None]:
##### functional api
# inception_res_partial = models.Model(inputs=inception_res_model.input, outputs=inception_res_model.output)

# input = tf.keras.layers.Input(shape=(299, 299, 3))

# x = inception_res_partial(input)
# x = tf.keras.layers.GlobalAveragePooling2D()(x)
# x = tf.keras.layers.Dense(1024, activation='relu')(x)


# dig_data_input = tf.keras.layers.Input(shape=(3,))
# dig_data = tf.keras.layers.Dense(3, activation='relu')(dig_data_input)
# x = tf.keras.layers.Concatenate(axis=-1)([x,dig_data])
# x = tf.keras.layers.Dense(1027, activation='relu')(x)
# x = tf.keras.layers.Dense(3, activation='softmax')(x)

# dig_model = tf.keras.models.Model(inputs=[input,dig_data_input], outputs=x)

# dig_model.summary()


In [None]:
class pretrained_model(tf.keras.Model):
    def __init__(self):
        super(pretrained_model, self).__init__()
        self.inception_res_model = tf.keras.applications.InceptionResNetV2(include_top=False,weights='imagenet',input_shape=(299, 299, 3))
        self.inception_res_partial = models.Model(inputs=self.inception_res_model.input, outputs=self.inception_res_model.output)
        self.global_average_pooling = tf.keras.layers.GlobalAveragePooling2D()
        self.dense1 = tf.keras.layers.Dense(1024, activation='relu')
    def call(self, input):

        x = self.inception_res_partial(input)
        x = self.global_average_pooling(x)
        x = self.dense1(x)
        return x

In [None]:
class dignose_model(tf.keras.Model):
    def __init__(self):
        super(dignose_model, self).__init__()
        #### 특정 feature 가중치 ---> 젊은이에게 부정적 가중치
        ###### loss를 customize 해보자
        self.dense1 = tf.keras.layers.Dense(3, activation='relu')
    def call(self, input):
        x = self.dense1(input)
        return x

In [None]:
class concat_model(tf.keras.Model):
    def __init__(self):
        super(concat_model, self).__init__()
        self.pretrained_model = pretrained_model()
        self.dignose_model = dignose_model()
        self.concat = tf.keras.layers.Concatenate(axis=-1)
        self.dense1 = tf.keras.layers.Dense(1027, activation='relu')
        self.dense2 = tf.keras.layers.Dense(3, activation='softmax')
    def call(self, inputs):
        img, digno=inputs
        img = self.pretrained_model(img)
        digno = self.dignose_model(digno)
        x = self.concat((img,digno))
        x = self.dense1(x)
        x = self.dense2(x)
        return x

In [None]:
model = concat_model()

In [None]:
for data, dianose, labels in train_dataset.take(1):
    model(data, dianose)

TypeError: concat_model.call() takes 2 positional arguments but 3 were given

In [None]:


model.summary()

Model: "concat_model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 pretrained_model_6 (pretra  multiple                  55910624  
 ined_model)                                                     
                                                                 
 dignose_model_6 (dignose_m  multiple                  12        
 odel)                                                           
                                                                 
 concatenate_9 (Concatenate  multiple                  0         
 )                                                               
                                                                 
 dense_32 (Dense)            multiple                  1055756   
                                                                 
 dense_33 (Dense)            multiple                  3084      
                                                    

In [None]:
diagnosis_data_result = np.random.randint(3, size=(300, 1))

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [None]:
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

In [None]:
model.fit(x=[image_data_array, diagnosis_data], y=diagnosis_data_result, epochs=10)

Epoch 1/10
 1/10 [==>...........................] - ETA: 21:32 - loss: 7.0059 - accuracy: 0.2188

KeyboardInterrupt: 