[tutorial](https://www.analyticsvidhya.com/blog/2019/01/build-image-classification-model-10-minutes/)

In [None]:
import keras 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten 
from keras.layers import Conv2D, MaxPooling2D 
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from keras.preprocessing import image 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import os

In [None]:
df = pd.read_csv('CoronaImages/Chest_xray_Corona_Metadata.csv').drop('Unnamed: 0', axis =1)
na_fill = {'VirusCategory1': 'Normal', 'VirusCategory2': 'Normal'}
df = df.fillna(value = na_fill).replace('bacteria', 'Bacteria')
df.VirusCategory2.unique(), df.VirusCategory1.unique()
df = df.join(pd.get_dummies(df.VirusCategory1.values, prefix = 'type'))

In [None]:
df.head(2)

In [None]:
def get_image_value(path): 
    img = image.load_img(path, target_size = (28,28,1))
    img = image.img_to_array(img)/255
    return img 

def get_train(df): 
    train_df = df[df.DataType == 'TRAIN']
    unique_types = [f'type_{i}' for i in df.VirusCategory1.unique()]
    train_labels = train_df[unique_types].values 
    train_paths = list(map(lambda x: f'CoronaImages/train/{x}', train_df.ImagePath.values))
    train_img = []
    for path in tqdm(train_paths): 
        train_img.append(get_image_value(path))
    
    return dict(Images = np.array(train_img).squeeze(), Labels = train_labels)

def get_test(df): 
    test_df = df[df.DataType == 'TEST']
    unique_types = [f'type_{i}' for i in df.VirusCategory1.unique()]
    test_labels = test_df[unique_types].values 
    test_paths = list(map(lambda x: f'CoronaImages/test/{x}', test_df.ImagePath.values))
    test_img = []   
    for path in tqdm(test_paths): 
        test_img.append(get_image_value(path))
    
    return dict(Images = np.array(test_img).squeeze(), Labels = test_labels)
    
# label_encoder = LabelEncoder().fit(df.VirusCategory1.values)
train_dict = get_train(df)
test_dict = get_test(df)

In [None]:
# one_hot = OneHotEncoder().fit(df.VirusCategory1.unique().reshape(1,-1))
# label_encoder = LabelEncoder().fit(df.VirusCategory1.values)


x_train = train_dict['Images']
y_train = train_dict['Labels']
x_test = test_dict['Images']
y_test = test_dict['Labels']
print(x_train[0].shape, x_train.shape, x_test[0].shape, x_test.shape)
print(y_train.shape, y_test.shape)

### NN Exprimentation

In [None]:
def get_conv_model(x, y): 
    drop = .2 
    
    model = Sequential() 
    
    model.add(Conv2D(32, kernel_size=(3, 3),activation='relu', input_shape = (28,28,3)))
    
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(drop))
    
    model.add(Flatten())
    
    model.add(Dense(128, activation='relu'))
    
    model.add(Dropout(drop))
    model.add(Dense(5, activation='softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model 

In [None]:
assert False

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=pat, verbose=1)

model_checkpoint = ModelCheckpoint('ModelCheckpointWeights.h5', verbose=1, save_best_only=True)

epochs = 5
batch_size = 128
model = get_conv_model(x_train, y_train)
model.fit(x_train, y_train, epochs = epochs, batch_size = batch_size, verbose = 1, 
         callbacks = [early_stopping, model_checkpoint], validation_data = (x_test, y_test))

In [None]:
df[(df.VirusCategory1 == 'COVID-19') & (df.DataType =='TEST')]

# test_img = get_image_value('Test.jpeg')
# test_predict = model.predict(test_img.reshape(28,28,3))

In [None]:
assert False

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

epochs = 5 
n_folds = 1 
batch_size = 128 

model_history =[] 
pat = 5 
early_stopping = EarlyStopping(monitor='val_loss', patience=pat, verbose=1)

model_checkpoint = ModelCheckpoint('fas_mnist_1.h5', verbose=1, save_best_only=True)
for i in range(n_folds): 
    train_x, test_x, train_y, test_y = train_test_split(x_train, y_train, train_size = .85,
                                                       random_state = np.random.randint(1, 1000))
    model = get_conv_model(train_x, train_y)
    results = model.fit(train_x, train_y, epochs = epochs, batch_size = batch_size, 
                        validation_data = (test_x, test_y), callbacks = [early_stopping, model_checkpoint], 
                       verbose = 1)
    model_history.append(results)
    print(f'Finished {i}')

In [None]:
from sklearn.model_selection import cross_val_score, KFold

kfold = KFold(n_splits =10, shuffle = True)
results = cross_val_score(model, x_test, y_test, scoring = 'accuracy')