# Import libraries and data

In [None]:
import numpy as np, pandas as pd, seaborn as sns
import random, os, zipfile, sys, time, re
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras

In [None]:
from PIL import Image
from keras.layers import Input, Conv2D, Dense, Flatten, MaxPooling2D, Input, GlobalAveragePooling2D, Dropout
from keras.models import Model, Sequential
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.layers import Lambda
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
os.makedirs('garbage')

In [None]:
!unzip /content/drive/MyDrive/capstone/unsplitted.zip -d /content/garbage

# Categorizing data to dataframe

In [None]:
base_path = "/content/garbage/"

categories = {0: 'O_', 1: 'cardboard', 2: 'glass', 3: 'metal', 4: 'paper', 5: 'plastic'}

In [None]:
# Add class name prefix to filename. So for example "/paper104.jpg" become "paper/paper104.jpg"
def classname(df, col):
    df[col] = df[col].apply(lambda x: x[:re.search("\d",x).start()] + '/' + x)
    return df

filenames_list = []
categories_list = []

for category in categories:
    filenames = os.listdir(base_path + categories[category])

    filenames_list = filenames_list  +filenames
    categories_list = categories_list + [category] * len(filenames)

df = pd.DataFrame({
    'filename': filenames_list,
    'category': categories_list
})

df = classname(df, 'filename')
df = df.sample(frac=1).reset_index(drop=True)

In [None]:
print('Number of images:', len(df))
print('Number of categories:', len(categories))
df.head()

# Create model

In [None]:
import keras.applications.mobilenet_v2 as mobilenetv2

mobilenetv2_layer = mobilenetv2.MobileNetV2(include_top = False,
                                            input_shape = (224, 224, 3),
                                            weights = 'imagenet')

mobilenetv2_layer.trainable = False

In [None]:
def mobilenetv2_preprocessing(img):
  return mobilenetv2.preprocess_input(img)

In [None]:
model = tf.keras.Sequential([
    tf.keras.Input(shape = (224, 224, 3)),
    Lambda(mobilenetv2_preprocessing),
    mobilenetv2_layer,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(len(categories), activation = 'softmax')
])

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['categorical_accuracy'])
model.summary()

In [None]:
early_stop = EarlyStopping(patience = 2,
                           verbose = 1,
                           monitor='val_categorical_accuracy' ,
                           mode='max',
                           min_delta=0.001,
                           restore_best_weights = True)
callbacks = [early_stop]

# Split data to train, test, val

In [None]:
df['category'] = df['category'].replace(categories)

train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(val_df, test_size=0.3, random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

print('Train data size:', train_df.shape[0])
print('Validation data size:', val_df.shape[0])
print('Test data size:', test_df.shape[0])

# Train model

In [None]:
train_datagen = image.ImageDataGenerator(
    rotation_range=30,
    shear_range=0.1,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip = True,
    width_shift_range=0.2,
    height_shift_range=0.2)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    base_path,
    x_col='filename',
    y_col='category',
    target_size=(224, 224),
    class_mode='categorical',
    batch_size=64)

In [None]:
val_datagen = image.ImageDataGenerator()

val_generator = val_datagen.flow_from_dataframe(
    val_df,
    base_path,
    x_col='filename',
    y_col='category',
    target_size=(224, 224),
    class_mode='categorical',
    batch_size=64)

In [None]:
history = model.fit(train_generator,
                    epochs = 15,
                    validation_data = val_generator,
                    validation_steps = val_df.shape[0]//64,
                    steps_per_epoch = train_df.shape[0]//64,
                    callbacks=callbacks)

In [None]:
model.save("model.h5")

# Visualize train and val loss and accuracy

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1)
ax1.plot(history.history['loss'], color='b', label='Training loss')
ax1.plot(history.history['val_loss'], color='r', label='Validation loss')
ax1.legend()

ax2.plot(history.history['categorical_accuracy'], color='b', label='Training accuracy')
ax2.plot(history.history['val_categorical_accuracy'], color='r',label='Validation accuracy')
ax2.legend()

legend = plt.legend(loc='best')
plt.tight_layout()
plt.show()

# Evaluate model

In [None]:
test_datagen = image.ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=base_path,
    x_col='filename',
    y_col='category',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=1,
    shuffle=False)

In [None]:
filenames = test_generator.filenames
num_sample = len(filenames)

_, accuracy = model.evaluate_generator(test_generator, num_sample)

print('Accuracy on test set: ',  (accuracy * 100), '%')

In [None]:
label_map = test_generator.class_indices
label_map = dict((v,k) for k,v in label_map.items())
print(label_map)

In [None]:
predicts = model.predict(test_generator, num_sample)
predicts = predicts.argmax(1)
predicts = [label_map[item] for item in predicts]
labels = test_df['category'].to_numpy()

print(classification_report(labels, predicts))

# Trial predicting

In [None]:
from google.colab import files
import cv2

In [None]:
def load_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img,(224,224))
    img = np.reshape(img,[1,224,224,3])
    return img

In [None]:
def predict_image(model, image_path):
    img = load_image(image_path)
    preds = model.predict(img)
    preds = preds.argmax(1)
    preds = [label_map[item] for item in preds]
    plt.imshow(img[0])
    plt.show()
    print(preds)

In [None]:
predict_image(model, '/content/garbage/O_/O_13585.jpg')

In [None]:
predict_image(model, '/content/garbage/metal/metal105.jpg')