In [37]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [39]:
!unzip /content/drive/MyDrive/ML_HW4/HW4_data.zip

Archive:  /content/drive/MyDrive/ML_HW4/HW4_data.zip
replace HW4_data/private/test_853.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [40]:
X_train = []
for i in range(10000):
    X_train.append(np.asarray(Image.open("HW4_data/public/train_{i}.jpg".format(i=i))))
df_train = pd.read_csv("HW4_data/public/train_label.csv", index_col=0)

In [41]:
X_val = []
for i in range(1000):
    X_val.append(np.asarray(Image.open("HW4_data/public/val_{i}.jpg".format(i=i))))
    
df_val = pd.read_csv("HW4_data/public/val_label.csv", index_col=0)

In [42]:
X_test = []
for i in range(1500):
    X_test.append(np.asarray(Image.open("HW4_data/private/test_{i}.jpg".format(i=i))))

df_test_demo = pd.read_csv("HW4_data/private/test_demo.csv", index_col=0)

In [43]:
X_train = np.array(X_train)
X_val = np.array(X_val)
X_test = np.array(X_test)

In [44]:
X_train = X_train.reshape(X_train.shape[0],128,128,1).astype('float32')
X_val = X_val.reshape(X_val.shape[0],128,128,1).astype('float32')
X_test = X_test.reshape(X_test.shape[0],128,128,1).astype('float32')

In [45]:
batch_size = 32
img_height = 128
img_width = 128

In [46]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, df_train["Edema"]))
train_dataset = train_dataset.batch(batch_size=batch_size)
valid_dataset = tf.data.Dataset.from_tensor_slices((X_val, df_val["Edema"]))
valid_dataset = valid_dataset.batch(batch_size=batch_size)

In [47]:
num_classes = 1
model = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 1)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
  ])

In [48]:
model.compile(optimizer='adam',
              loss="mean_squared_error",
              metrics=['accuracy'])

In [49]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_2 (Rescaling)     (None, 128, 128, 1)       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 128, 128, 16)      160       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 64, 64, 16)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 64, 64, 32)        4640      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 32, 32, 32)       0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 32, 32, 64)       

In [50]:
epochs=5
history = model.fit(
  train_dataset,
  validation_data=valid_dataset,
  epochs=epochs
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [51]:
predictions = model.predict(X_test)
new_prediction = np.where(predictions>0.5, 1, 0)

In [52]:
from sklearn.metrics import f1_score

In [53]:
#f1_score(y_true=df_val["Edema"],y_pred=new_prediction)

In [54]:
ans_path = '108060033_basic_prediction.csv'
pd.DataFrame(new_prediction).to_csv(ans_path, header=["Edema"], index = True)

In [55]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.layers import Conv2D
from keras.layers import MaxPooling2D

In [56]:
y_train = np.array(df_train.drop(['Age', 'Gender'],axis=1))
y_val = np.array(df_val.drop(['Age', 'Gender'],axis=1))

In [57]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.batch(batch_size=batch_size)
valid_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
valid_dataset = valid_dataset.batch(batch_size=batch_size)

In [58]:
num_classes = 7
model_adv = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 1)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes,activation='sigmoid')
  ])

In [59]:
# compile model
model_adv.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [60]:
epochs=5
history = model_adv.fit(
  train_dataset,
  validation_data=valid_dataset,
  epochs=epochs
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [61]:
predictions = model_adv.predict(X_test)
new_prediction_adv = np.where(predictions>0.2, 1, 0)

In [62]:
ans_path = '108060033_advanced_prediction.csv'
pd.DataFrame(new_prediction_adv).to_csv(ans_path, header=["Atelectasis","Cardiomegaly", "Edema", "Lung Opacity", "No Finding", "Pleural Effusion", "Support Devices"], index = True)

In [63]:
#f1_score(y_val, new_prediction_adv, average='weighted')