In [101]:
import glob
import numpy as np 
import pandas as pd 

from matplotlib import pyplot as plt
import seaborn as sns


import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, Adamax

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [102]:
#getting train and validation images path 
image_paths=glob.glob('../input/hostadadafinala/HOSTDATAFINAL/train/**/*.png', recursive=True)
#obtaining labels from the path
image_labels=pd.Series([x.split("/")[-2] for x in image_paths])

#getting  test images path 
image_paths_test=glob.glob('../input/hostadadafinala/HOSTDATAFINAL/test/**/*.png', recursive=True)
#obtaining labels from the path
image_labels_test=pd.Series([x.split("/")[-2] for x in image_paths_test])

In [103]:
#creating dataframe from paths and labels
df=pd.DataFrame({"Paths":image_paths,"Labels":image_labels})
df_test=pd.DataFrame({"Paths":image_paths_test,"Labels":image_labels_test})

In [104]:
df_test.head()

In [105]:
df.Labels.value_counts()

In [106]:
print("Number of training and validation images:",df.shape[0])
print("Number of testing images:",df_test.shape[0])

In [107]:
df = df.sample(frac=1,random_state=4567).reset_index(drop = True)

In [108]:
fig, axes = plt.subplots(2, 3, figsize=(12, 7), subplot_kw={'xticks': [], 'yticks': []})
for idx, ax in enumerate(axes.flat):    
    ax.imshow(plt.imread(df["Paths"].iloc[idx]))
    ax.set_title(df["Labels"].iloc[idx])
plt.show()

Examples of images from the train set.

# Generators definition

In [109]:
#It is useful to augment the data in order to avoid overfitting. It is also useful considering that there aren't a lot of images in both train and validation set.
train_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.xception.preprocess_input, width_shift_range=0.1, 
                                   height_shift_range=0.1,zoom_range=0.2,brightness_range=[0.2,1.0],validation_split=0.1)
test_datagen=ImageDataGenerator(preprocessing_function=tf.keras.applications.xception.preprocess_input)

In [110]:
BATCH_SIZE=64
train_images=train_datagen.flow_from_dataframe(dataframe=df,x_col='Paths',y_col='Labels',color_mode='rgb',class_mode='categorical',
                                                target_size=(224, 224),batch_size=BATCH_SIZE,shuffle=True,seed=1234,subset='training')

val_images=train_datagen.flow_from_dataframe(dataframe=df,x_col='Paths',y_col='Labels',color_mode='rgb',class_mode='categorical',
                                                target_size=(224, 224),batch_size=BATCH_SIZE,shuffle=True,seed=1234,subset='validation')

test_images = test_datagen.flow_from_dataframe(dataframe=df_test,x_col='Paths',y_col='Labels',color_mode='rgb',class_mode='categorical',
                                                target_size=(224, 224),batch_size=BATCH_SIZE,shuffle=False)

# Model definition

In [111]:
inputs = tf.keras.layers.Input((224,224,3))
base_model=tf.keras.applications.xception.Xception(include_top=False, weights="imagenet",input_shape=(224, 224,3), pooling='max') 
base_model.trainable = False
x=base_model(inputs)
x = layers.Dense(512,activation = 'relu')(x)
x = layers.Dense(256,activation = 'relu')(x)
x = layers.Dense(128,activation='relu')(x) 
x=layers.Dropout(rate=.5)(x)  
output=layers.Dense(2, activation='softmax')(x)
model=tf.keras.models.Model(inputs=inputs, outputs=output)

In [112]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=1e-3,decay_steps=10000,decay_rate=1e-6)
model.compile(Adamax(learning_rate=lr_schedule), loss='categorical_crossentropy',metrics=['accuracy'])
history = model.fit(train_images, validation_data=val_images, epochs=2)

# Model evaluation

In [113]:
plt.plot(history.history["accuracy"], label="train_acc")
plt.plot(history.history["val_accuracy"], label="val_acc")
plt.title('Accuracy Plot')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend()
plt.show()

In [114]:
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.title('Loss Plot')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.legend()
plt.show()

In [115]:
#predicts the test images and converts to labels 
pred=model.predict(test_images)
pred = np.argmax(pred,axis=1)
pred=pred>0.5
gt=[0 if x=="0" else 1 for x in df_test["Labels"]]

In [116]:
test_images

In [117]:
type(test_images)

In [118]:
pred

In [119]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(gt,pred))

In [120]:
conf_matrix = confusion_matrix(gt, pred)
sns.heatmap(conf_matrix,xticklabels = ["0","1"], yticklabels = ["0","1"],annot=True)
plt.title('Confusion Matrix')
plt.show()

In [121]:
from keras.models import save_model
save_model(model,'model.h5')

In [122]:
from tensorflow import keras

In [123]:
model=keras.models.load_model('../input/savedmodel/model.h5')

In [124]:
model.summary()

In [125]:
import cv2

In [126]:
from keras.preprocessing import image

test_image = image.load_img('../input/holdout/sample_10.png', target_size = (224,224)) 
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)

#predict the result
result = model.predict(test_image)

In [127]:
pred = np.argmax(result,axis=1)
pred=pred>0.5

In [128]:
pred