In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as img

In [2]:
import os
import keras
from sklearn.metrics import f1_score
from tensorflow.keras.models import Model, Sequential 
from tensorflow.keras.layers import  BatchNormalization, Conv2D, Dropout, MaxPooling2D ,Flatten, Dense
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB   
from sklearn.ensemble import GradientBoostingClassifier 

In [3]:
from tensorflow.keras.applications.resnet50 import ResNet50

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
STUDENT_ID = '108062313'

In [None]:
!unzip "/content/drive/MyDrive/HW4_data"

In [None]:
X_train = []
for i in range(10000):
    X_train.append(np.asarray(Image.open("HW4_data/public/train_{i}.jpg".format(i=i))))
    
df_train = pd.read_csv("HW4_data/public/train_label.csv", index_col=0)
X_train = np.array(X_train)

In [None]:
X_val = []
for i in range(1000):
    X_val.append(np.asarray(Image.open("HW4_data/public/val_{i}.jpg".format(i=i))))
    
df_val = pd.read_csv("HW4_data/public/val_label.csv", index_col=0)
X_val = np.array(X_val)

In [None]:
X_test = []
for i in range(1500):
    if os.path.exists("HW4_data/private/test_"+ str(i) +".jpg"):
        X_test.append(np.asarray(Image.open("HW4_data/private/test_{i}.jpg".format(i=i))))
    

df_test_demo = pd.read_csv("HW4_data/private/test_demo.csv", index_col=0)
X_test = np.array(X_test)

In [None]:
# basic ===========================================

In [None]:
model_basic = Sequential() 

#first convolutional layer
model_basic.add(Conv2D(32, kernel_size=3, input_shape = (128, 128, 1), activation = 'relu'))
model_basic.add(Conv2D(32, kernel_size=3, activation = 'relu'))
model_basic.add(BatchNormalization())
model_basic.add(MaxPooling2D(pool_size = (2, 2)))

# Second convolutional layer
model_basic.add(Conv2D(64, kernel_size=3, activation = 'relu'))
model_basic.add(Conv2D(64, kernel_size=3, activation = 'relu'))
model_basic.add(BatchNormalization())
model_basic.add(MaxPooling2D(pool_size = (2, 2)))

# Third convolutional layer
model_basic.add(Conv2D(128, kernel_size=3, activation = 'relu'))
model_basic.add(Conv2D(128, kernel_size=3, activation = 'relu'))
model_basic.add(Conv2D(128, kernel_size=3, activation = 'relu'))
model_basic.add(Conv2D(128, kernel_size=3, activation = 'relu'))
model_basic.add(BatchNormalization())
model_basic.add(MaxPooling2D(pool_size=(2, 2)))



model_basic.add(Flatten())
model_basic.add(Dropout(0.4))

model_basic.add(Dense(units=128, activation = 'relu'))
model_basic.add(Dense(units=1, activation = 'sigmoid'))

model_basic.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

model_basic.summary()

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
history_basic = model_basic.fit(X_train, df_train["Edema"], batch_size=4, validation_data=(X_val, df_val["Edema"]), epochs=10)
model_basic.save('basic_model.h5')
# model_basis=keras.models.load_model('basic_model.h5')

In [None]:
basic_pred=model_basic.predict(X_test)
basic_pred=(basic_pred>0.5)

In [None]:
# f1_score(df_val["Edema"], basic_pred)

In [None]:
output_path = STUDENT_ID + '_basic_prediction.csv'
basic_output = pd.DataFrame()
for i in range(len(X_test)):
    basic_output=basic_output.append({"":round(i), 'Edema': int(basic_pred[i]) }, ignore_index=True)

basic_output.to_csv(output_path, index = False)

In [None]:
# bonus ===========================================

In [None]:
X_train_pred = model_basic.predict(X_train)
X_train_pred=(X_train_pred>0.5)
age = df_train["Age"]
age = np.expand_dims(age, axis=-1)
gender = df_train["Gender"]
gender = np.expand_dims(gender, axis=-1)
X_train_bonus=np.concatenate([X_train_pred, age, gender], axis=1)
print(X_train_bonus.shape)

In [None]:
X_val_pred = model_basic.predict(X_val)
X_val_pred=np.round(X_val_pred)
age = df_val["Age"]
age = np.expand_dims(age, axis=-1)
gender = df_val["Gender"]
gender = np.expand_dims(gender, axis=-1)
X_val_bonus=np.concatenate([X_val_pred, age, gender], axis=1)

In [None]:
X_test_pred = model_basic.predict(X_test)
X_teste_pred=np.round(X_test_pred)
age = df_test_demo["Age"]
age = np.expand_dims(age, axis=-1)
gender = df_test_demo["Gender"]
gender = np.expand_dims(gender, axis=-1)
X_test_bonus=np.concatenate([X_test_pred, age, gender], axis=1)

In [None]:
    model_bonus = RandomForestClassifier(n_estimators=100, random_state=0)
    model_bonus.fit(X_train_bonus, df_train["Edema"])

In [None]:
# model_bonus = GradientBoostingClassifier()
# model_bonus.fit(X_train_bonus, df_train["Edema"])

In [None]:
# model_bonus = GaussianNB()
# model_bonus.fit(X_train_bonus, df_train["Edema"])

GaussianNB()

In [None]:
bonus_pred=model_bonus.predict(X_test_bonus)
# f1_score(df_val["Edema"], bonus_pred)
# print(bonus_pred)

0.726962457337884

In [None]:
output_path = STUDENT_ID + '_bonus_prediction.csv'
bonus_output = pd.DataFrame()
for i in range(len(X_test)):
    bonus_output=bonus_output.append({"":round(i), 'Edema': int(bonus_pred[i]) }, ignore_index=True)

bonus_output.to_csv(output_path, index = False)

In [None]:
# advanced ===========================================

In [None]:
resnet=ResNet50(
      include_top=False, 
      weights='imagenet',
      input_shape=(128,128,3), 
      )

model_advanced = Sequential() 
model_advanced.add(resnet)
model_advanced.add(Flatten())
model_advanced.add(Dense(units=1000, activation = 'relu'))
model_advanced.add(Dense(units=7, activation = 'sigmoid'))
model_advanced.summary()
model_advanced.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_16 (Conv2D)          (None, 126, 126, 32)      320       
                                                                 
 conv2d_17 (Conv2D)          (None, 124, 124, 32)      9248      
                                                                 
 batch_normalization_6 (Batc  (None, 124, 124, 32)     128       
 hNormalization)                                                 
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 62, 62, 32)       0         
 2D)                                                             
                                                                 
 conv2d_18 (Conv2D)          (None, 60, 60, 64)        18496     
                                                                 
 conv2d_19 (Conv2D)          (None, 58, 58, 64)       

In [None]:
# for resnet
X_train_temp = X_train
X_train_temp = np.expand_dims(X_train_temp, axis=-1)
X_train_advanced = np.concatenate([X_train_temp, X_train_temp, X_train_temp], axis=-1)
X_train_advanced = np.expand_dims(X_train_advanced, axis=-1)
print(X_train_advanced.shape)
X_val_temp = X_val
X_val_temp = np.expand_dims(X_val_temp, axis=-1)
X_val_advanced = np.concatenate([X_val_temp, X_val_temp, X_val_temp], axis=-1)
X_val_advanced = np.expand_dims(X_val_advanced, axis=-1)
print(X_val_advanced.shape)
X_test_temp = X_test
X_test_temp = np.expand_dims(X_test_temp, axis=-1)
X_test_advanced = np.concatenate([X_test_temp, X_test_temp, X_test_temp], axis=-1)
X_test_advanced = np.expand_dims(X_test_advanced, axis=-1)
print(X_test_advanced.shape)

(10000, 128, 128, 3, 1)
(1000, 128, 128, 3, 1)
(1500, 128, 128, 3, 1)


In [None]:
Y_train_advanced=np.array(df_train)
Y_train_advanced=Y_train_advanced[:,0:7]
Y_val_advanced=np.array(df_val)
Y_val_advanced=Y_val_advanced[:,0:7]
print(Y_train_advanced.shape)
print(Y_val_advanced.shape)

(10000, 7)
(1000, 7)


In [None]:
reset 7 lable 0.62
history_advanced = model_advanced.fit(X_train_advanced, Y_train_advanced, validation_data=(X_val_advanced, Y_val_advanced), epochs=20)
model_advanced.save('advanced_model.h5')

In [None]:
advanced_pred=model_advanced.predict(X_test_advanced)
advanced_pred=(advanced_pred>0.4)
# print(advanced_pred)

In [None]:
# f1_score(Y_val_advanced, advanced_pred, average='weighted')

0.6767379594884202

In [None]:
output_path = STUDENT_ID + '_advanced_prediction.csv'
advanced_output = pd.DataFrame()
for i in range(len(X_test)):
    advanced_output=advanced_output.append({"":round(i), 
    "Atelectasis": int(advanced_pred[i][0]),
    "Cardiomegaly": int(advanced_pred[i][1]),
    "Edema": int(advanced_pred[i][2]),
    "Lung Opacity": int(advanced_pred[i][3]),
    "No Finding": int(advanced_pred[i][4]),
    "Pleural Effusion": int(advanced_pred[i][5]),
    "Support Devices": int(advanced_pred[i][6])}, ignore_index=True)

advanced_output.to_csv(output_path, index = False)