In [None]:
import tensorflow as tf
import numpy as np
import zipfile
import pandas as pd
import os
from pathlib import Path

In [None]:
data_zip = "/content/drive/MyDrive/🌍PUBLIC SHARED/archive.zip" 
output= "data"
with zipfile.ZipFile(data_zip,"r") as zip_ref:
    zip_ref.extractall(output)

#run on graphic card if possible
gpus = tf.config.list_physical_devices('GPU')
print("nb gpus", len(gpus))
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

In [None]:
def convert_img_to_df(dataset):
  img_dir = Path(dataset)
  filename = list(img_dir.glob(r'**/*.jpg'))
  label = list(map(lambda x:os.path.split(os.path.split(x)[0])[1],filename))

  filename = pd.Series(filename,name='Filepath').astype(str)
  label = pd.Series(label,name='Label')
  img_df = pd.concat([filename,label],axis=1)
  return img_df

In [None]:
df = convert_img_to_df(output)
print(df.head())

In [None]:

#suffle the data set
df = df.sample(frac=1)

#get train and test set (0.8, 0.2)
train_size = int(len(df)*0.8)

train_set = df.iloc[:train_size]
test_set = df.iloc[train_size:]

train_df = pd.DataFrame(train_set)
test_df = pd.DataFrame(test_set)

# process data as image
datagen = ImageDataGenerator(rescale=1./255,
                             width_shift_range=0.15,
                             height_shift_range=0.15,
                             horizontal_flip=True,
                             vertical_flip=True)

target_image_size = (224,224)


train_set = datagen.flow_from_dataframe(dataframe=train_df,
                                        x_col='Filepath',
                                        y_col='Label',
                                        batch_size=32,
                                        target_size=target_image_size,
                                        shuffle=True,
                                        seed=42,
                                        class_mode='categorical'
                                        )

train_set = datagen.flow_from_dataframe(dataframe=test_df,
                                        x_col='Filepath',
                                        y_col='Label',
                                        batch_size=32,
                                        target_size=target_image_size,
                                        shuffle=True,
                                        seed=42,
                                        class_mode='categorical'
                                        )


# get x_train and y_train from train_set
x_train, y_train = next(train_set)


In [None]:
checkpoint = ModelCheckpoint("best_model_epoch.hdf5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
history = model.fit(train_set,
                    validation_data=test_set,
                    validation_steps=len(test_set),
                    epochs=15,
                    callbacks=callbacks_list,
                    verbose=1)

In [None]:
#save
model.save("model.ckpt")
#zip it
!zip -r /content/model.zip /content/model.ckpt

In [None]:
#import model from hdf5 file
from keras.models import load_model
model = load_model('best_model_epoch.hdf5')

predictions = model.predict(test_set, verbose=1)
# print accuracy on test set
acc  = model.evaluate(test_set, verbose=1)
print("Accuracy on test set is: ", acc)

#pick a random image from test set and see the prediction
import random

n = random.randint(0, len(test_set)-1)
img = test_set[n][0]
plt.imshow(img[0])
plt.show()

print("The prediction for this image is: ", np.argmax(predictions[n]))

