# Class Challenge: Image Classification of COVID-19 X-rays
# Task 2 [Total points: 30]

## Setup

* This assignment involves the following packages: 'matplotlib', 'numpy', and 'sklearn'. 

* If you are using conda, use the following commands to install the above packages:<br>
```shell
conda install matplotlib
conda install numpy
conda install -c anaconda scikit-learn
```

* If you are using pip, use use the following commands to install the above packages: <br> 
```shell
pip install matplotlib
pip install numpy
pip install sklearn
```

## Data

Please download the data using the following link: [COVID-19](https://drive.google.com/file/d/1Y88tgqpQ1Pjko_7rntcPowOJs_QNOrJ-/view). 

* After downloading 'Covid_Data_GradientCrescent.zip', unzip the file and you should see the following data structure:


|--all<br>
|--------train<br>
|--------test<br>
|--two<br>
|--------train<br>
|--------test<br>


* Put the 'all' folder, the 'two' folder and this python notebook in the **same directory** so that the following code can correctly locate the data.  



## [20 points] Multi-class Classification

In [1]:
import os

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

os.environ['OMP_NUM_THREADS'] = '1'
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
tf.__version__

'2.1.0'

#### Load Image Data

In [2]:
DATA_LIST = os.listdir('all/train')
DATASET_PATH  = 'all/train'
TEST_DIR =  'all/test'
IMAGE_SIZE    = (224, 224)
NUM_CLASSES   = len(DATA_LIST)
BATCH_SIZE    = 10  # try reducing batch size or freeze more layers if your GPU runs out of memory
NUM_EPOCHS    = 100
LEARNING_RATE = 0.0001 # start off with high rate first 0.001 and experiment with reducing it gradually 

#### Generate Training and Validation Batches

In [3]:
train_datagen = ImageDataGenerator(rescale=1./255,rotation_range=50,featurewise_center = True,
                                   featurewise_std_normalization = True,width_shift_range=0.2,
                                   height_shift_range=0.2,shear_range=0.25,zoom_range=0.1,
                                   zca_whitening = True,channel_shift_range = 20,
                                   horizontal_flip = True,vertical_flip = True,
                                   validation_split = 0.2,fill_mode='constant')


train_batches = train_datagen.flow_from_directory(DATASET_PATH,target_size=IMAGE_SIZE,
                                                  shuffle=True,batch_size=BATCH_SIZE,
                                                  subset = "training",seed=42,
                                                  class_mode="categorical")

valid_batches = train_datagen.flow_from_directory(DATASET_PATH,target_size=IMAGE_SIZE,
                                                  shuffle=True,batch_size=BATCH_SIZE,
                                                  subset = "validation",
                                                  seed=42,class_mode="categorical")



Found 216 images belonging to 4 classes.
Found 54 images belonging to 4 classes.


#### [10 points] Build Model
Hint: Starting from a pre-trained model typically helps performance on a new task, e.g. starting with weights obtained by training on ImageNet. 

In [4]:
# raise NotImplementedError("Build your model based on an architecture of your choice "
#                           "A sample model summary is shown below")

# Implement VGG16
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.models import Sequential

# vgg_16 = VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
vgg_16 = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3), pooling='None', classes=4)
            
vgg_16.trainable = False

covid_model = Sequential()
covid_model.add(vgg_16)
covid_model.add(Flatten())
covid_model.add(Dense(256, activation='relu'))
# covid_model.add(Dropout(0.2))
covid_model.add(Dense(4, activation=None))

covid_model.build(input_shape=(224, 224, 3))
covid_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 256)               6422784   
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 1028      
Total params: 21,138,500
Trainable params: 6,423,812
Non-trainable params: 14,714,688
_________________________________________________________________


#### [5 points] Train Model

In [None]:
#FIT MODEL
print(len(train_batches))
print(len(valid_batches))

STEP_SIZE_TRAIN=train_batches.n//train_batches.batch_size
STEP_SIZE_VALID=valid_batches.n//valid_batches.batch_size

# raise NotImplementedError("Use the model.fit function to train your network")
covid_model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

history = covid_model.fit(train_batches, epochs=100, validation_data=(valid_batches))

22
6




  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 22 steps, validate for 6 steps
Epoch 1/100




Epoch 2/100

#### [5 points] Plot Accuracy and Loss During Training

In [None]:
import matplotlib.pyplot as plt

# raise NotImplementedError("Plot the accuracy and the loss during training")

# Accuracy over 40 Epochs
plt.figure()
plt.plot(history.history['accuracy'], label='train accuracy') 
plt.plot(history.history['val_accuracy'], label = 'validation accuracy') 
plt.title('Accuracy over 40 epochs')
plt.xlabel('Epoch') 
plt.ylabel('Accuracy') 
plt.ylim([0.4, 1.1]) 
plt.legend(loc='lower right')

# Loss over 40 Epochs
plt.figure()
plt.plot(history.history['loss'], label='train loss') 
plt.plot(history.history['val_loss'], label = 'validation loss') 
plt.title('Loss over 40 epochs')
plt.xlabel('Epoch') 
plt.ylabel('Loss') 
plt.ylim([0, 1.2]) 
plt.legend(loc='upper right')

#### Testing Model

In [None]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

eval_generator = test_datagen.flow_from_directory(TEST_DIR,target_size=IMAGE_SIZE,
                                                  batch_size=1,shuffle=True,seed=42,class_mode="categorical")
eval_generator.reset()
print(len(eval_generator))
x = model.evaluate_generator(eval_generator,steps = np.ceil(len(eval_generator)),
                           use_multiprocessing = False,verbose = 1,workers=1)
print('Test loss:' , x[0])
print('Test accuracy:',x[1])

## [10 points] TSNE Plot
t-Distributed Stochastic Neighbor Embedding (t-SNE) is a widely used technique for dimensionality reduction that is particularly well suited for the visualization of high-dimensional datasets. After training is complete, extract features from a specific deep layer of your choice, use t-SNE to reduce the dimensionality of your extracted features to 2 dimensions and plot the resulting 2D features.

In [None]:
from sklearn.manifold import TSNE

intermediate_layer_model = models.Model(inputs=model.input,
                                        outputs=model.get_layer('feature_dense').output)

tsne_eval_generator = test_datagen.flow_from_directory(DATASET_PATH,target_size=IMAGE_SIZE,
                                                  batch_size=1,shuffle=False,seed=42,class_mode="categorical")

# raise NotImplementedError("Extract features from the tsne_data_generator and fit a t-SNE model for the features,"
#                           "and plot the resulting 2D features of the four classes.")

outputs = intermediate_layer_model.predict_generator(tsne_data_generator,130,verbose=1)
print(outputs.shape)
label = tsne_data_generator.classes
features = TSNE(n_components=2).fit_transform(outputs)
print(features.shape)
# 到底是获取参数还是输出
plt.figure()
for index in range(len(features)):
    if label[index] == 0:
        # COVID
        plt.plot(features[index, 0], features[index, 1], 'bo')
    elif label[index] == 1:
        # normal
        plt.plot(features[index, 0], features[index, 1], 'yo')
    elif label[index] == 2:
        # Pneumonia_bac
        plt.plot(features[index, 0], features[index, 1], 'go')
    else:
        # Pneumonia_vir
        plt.plot(features[index, 0], features[index, 1], 'ro')
plt.title('2D features')

## AlexNet Architecture

In [None]:
# (1) Importing dependency
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten,Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import numpy as np
np.random.seed(1000)

# (2) Get Data
# import tflearn.datasets.oxflower17 as oxflower17
# x, y = oxflower17.load_data(one_hot=True)

# (3) Create a sequential model
alex_net_model = Sequential()

# 1st Convolutional Layer
alex_net_model.add(Conv2D(filters=96, input_shape=(224,224,3), kernel_size=(11,11), strides=(4,4), padding='valid'))
alex_net_model.add(Activation('relu'))
# Pooling 
alex_net_model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# Batch Normalisation before passing it to the next layer
alex_net_model.add(BatchNormalization())

# 2nd Convolutional Layer
alex_net_model.add(Conv2D(filters=256, kernel_size=(11,11), strides=(1,1), padding='valid'))
alex_net_model.add(Activation('relu'))
# Pooling
alex_net_model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# Batch Normalisation
alex_net_model.add(BatchNormalization())

# 3rd Convolutional Layer
alex_net_model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
alex_net_model.add(Activation('relu'))
# Batch Normalisation
alex_net_model.add(BatchNormalization())

# 4th Convolutional Layer
alex_net_model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
alex_net_model.add(Activation('relu'))
# Batch Normalisation
alex_net_model.add(BatchNormalization())

# 5th Convolutional Layer
alex_net_model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='valid'))
alex_net_model.add(Activation('relu'))
# Pooling
alex_net_model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))
# Batch Normalisation
alex_net_model.add(BatchNormalization())

# Passing it to a dense layer
alex_net_model.add(Flatten())
# 1st Dense Layer
alex_net_model.add(Dense(4096, input_shape=(224*224*3,)))
alex_net_model.add(Activation('relu'))
# Add Dropout to prevent overfitting
alex_net_model.add(Dropout(0.4))
# Batch Normalisation
alex_net_model.add(BatchNormalization())

# 2nd Dense Layer
alex_net_model.add(Dense(4096))
alex_net_model.add(Activation('relu'))
# Add Dropout
alex_net_model.add(Dropout(0.4))
# Batch Normalisation
alex_net_model.add(BatchNormalization())

# 3rd Dense Layer
alex_net_model.add(Dense(1000))
alex_net_model.add(Activation('relu'))
# Add Dropout
alex_net_model.add(Dropout(0.4))
# Batch Normalisation
alex_net_model.add(BatchNormalization())

# Output Layer
alex_net_model.add(Dense(17))
alex_net_model.add(Activation('softmax'))

alex_net_model.summary()

# (4) Compile 
alex_net_model.build(input_shape=(224, 224, 3))
alex_net_model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

# (5) Train
alex_net_history = alex_net_model.fit(x, y, batch_size=64, epochs=1, verbose=1, validation_split=0.2, shuffle=True)

