In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from os import listdir
from glob import glob
from PIL import Image
import os
import cv2
import scipy.ndimage as ndimage
from skimage import io

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,precision_score,recall_score
from sklearn.metrics import roc_curve,auc

In [3]:
import tensorflow as tf
from tensorflow import keras
from keras.utils import load_img, img_to_array
from keras.losses import binary_crossentropy
from keras.models import Sequential,load_model
from keras.layers import Conv2D,MaxPooling2D,Flatten,Dense,Dropout,BatchNormalization 
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras import backend as K
from keras import activations
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers


from numpy import expand_dims


In [4]:
train_df=pd.read_csv("../data/processed/trainpath_df1.csv")
val_df=pd.read_csv("../data/processed/valpath_df1.csv")
test_df=pd.read_csv("../data/processed/testpath_df1.csv")

In [5]:
def load_data(files):
    X = []
    y = []
    for file in files:
        img = load_img(file, target_size = (50,50))
        pix = img_to_array(img)
        pix /= 255
        X.append(pix)
        if(file[-5] == '1'):
             y.append(1)
        elif(file[-5] == '0'):
            y.append(0)
    return np.stack(X), y

In [6]:
X,y=load_data(train_df.path) #Loading training images into numpy arrays
y=np.vstack(y)
X.shape,y.shape

((188300, 50, 50, 3), (188300, 1))

In [7]:
X_val,y_val=load_data(val_df.path) #Loading validation images into numpy arrays
y_val=np.vstack(y_val)
X_val.shape,y_val.shape

((41625, 50, 50, 3), (41625, 1))

### Training the model with imbalance data

In [8]:
opt=optimizers.Adam(learning_rate=0.01)

In [9]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding="same", input_shape=(50, 50, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())


model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())


model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
#model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=opt,
              metrics=['acc'])

checkpoint = ModelCheckpoint("../models/final_imbalance_best_adam1.hdf5", monitor='val_acc', verbose=1,
    save_best_only=True, mode='max')


es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

In [10]:
M=model.fit(X, y,validation_data=(X_val, y_val),  epochs=100,batch_size=256,callbacks=[checkpoint,es])

Epoch 1/100
Epoch 1: val_acc improved from -inf to 0.68983, saving model to ../models/final_imbalance_best_adam1.hdf5
Epoch 2/100
Epoch 2: val_acc did not improve from 0.68983
Epoch 3/100
Epoch 3: val_acc did not improve from 0.68983
Epoch 4/100
Epoch 4: val_acc did not improve from 0.68983
Epoch 5/100
Epoch 5: val_acc did not improve from 0.68983
Epoch 6/100
Epoch 6: val_acc did not improve from 0.68983
Epoch 7/100
Epoch 7: val_acc did not improve from 0.68983
Epoch 8/100
Epoch 8: val_acc did not improve from 0.68983
Epoch 9/100
Epoch 9: val_acc did not improve from 0.68983
Epoch 10/100
Epoch 10: val_acc did not improve from 0.68983
Epoch 11/100
Epoch 11: val_acc did not improve from 0.68983
Epoch 11: early stopping
