In [1]:
# Initiate Python in Colab environment
!pip install PyDrive

Collecting PyDrive
[?25l  Downloading https://files.pythonhosted.org/packages/52/e0/0e64788e5dd58ce2d6934549676243dc69d982f198524be9b99e9c2a4fd5/PyDrive-1.3.1.tar.gz (987kB)
[K     |████████████████████████████████| 993kB 2.8MB/s 
Building wheels for collected packages: PyDrive
  Building wheel for PyDrive (setup.py) ... [?25l[?25hdone
  Created wheel for PyDrive: filename=PyDrive-1.3.1-cp36-none-any.whl size=27437 sha256=cbd5e8291f02954593fc994a6f55d929c556fe4d5990b2906b13bb874c0f1a58
  Stored in directory: /root/.cache/pip/wheels/fa/d2/9a/d3b6b506c2da98289e5d417215ce34b696db856643bad779f4
Successfully built PyDrive
Installing collected packages: PyDrive
Successfully installed PyDrive-1.3.1


In [0]:
# Import essentials

import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
# Enable Google drive authentication and authorization to read from the google drive

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
# Link the training set, download and unzip it
download = drive.CreateFile({'id': '1gvdIbq3G4JwHvRUGvW7KnWA9hP2bpUSy'})
download.GetContentFile('Full_Set_Proc_Comb.zip')
!unzip Full_Set_Proc_Comb.zip

In [0]:
# Link the training set
download = drive.CreateFile({'id': '1mM5e0SRKuTklmVoKtqKx8dT_mxezrhxo'})
download.GetContentFile('Test_Set_Proc_Comb.zip')
!unzip Test_Set_Proc_Comb.zip


In [6]:
# Link the training set
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from tqdm import tqdm

Using TensorFlow backend.


In [0]:
# Read the train and test files
train = pd.read_csv('Full_Set_Proc_Comb/train.csv')
test = pd.read_csv('Test_Set_Proc_Comb/test.csv')


In [35]:
# Get full file names
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
df = train
df['id'] = df.id.astype(str)
df['label'] = df.label.astype(str)
df['id'] = df['id'] + '.jpg'
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
train_set.head()

Unnamed: 0,id,label
382,383.jpg,1
538,539.jpg,1
1493,1494.jpg,4
1112,1113.jpg,3
324,325.jpg,1


In [36]:
# Get full file names for test set

df_test = test
df_test['id'] = df_test.id.astype(str)
df_test['label'] = df_test.label.astype(str)
df_test['id'] = df_test['id'] + '.jpg'

df_test.head()

Unnamed: 0,id,label
0,1501.jpg,0
1,1502.jpg,0
2,1503.jpg,0
3,1504.jpg,0
4,1505.jpg,0


In [37]:
# Check if data is split sort of evenly
train_set['label'].value_counts()

2    251
0    244
4    236
1    235
3    234
Name: label, dtype: int64

In [42]:
# Define Data Generators
datagen=ImageDataGenerator(
    rotation_range = 45,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.3,
    zoom_range = 0.3,
    horizontal_flip = True,
    rescale=1./255)

datagentest=ImageDataGenerator(
    rescale=1./255)

train_generator=datagen.flow_from_dataframe(
    dataframe=train_set, 
    directory="Full_Set_Proc_Comb", 
    x_col="id", 
    y_col="label", 
    class_mode="categorical", 
    target_size=(100,100), 
    batch_size=32,
    shuffle=True)

valid_generator=datagentest.flow_from_dataframe(
    dataframe=test_set, 
    directory="Full_Set_Proc_Comb", 
    x_col="id", 
    y_col="label", 
    class_mode="categorical", 
    target_size=(100,100), 
    batch_size=32,
    shuffle=False)

test_generator=datagentest.flow_from_dataframe(
    dataframe=df_test, 
    directory="Test_Set_Proc_Comb", 
    x_col="id", 
    y_col="label", 
    class_mode="categorical", 
    target_size=(100,100), 
    batch_size=32,
    shuffle=False)

Found 1200 validated image filenames belonging to 5 classes.
Found 300 validated image filenames belonging to 5 classes.
Found 50 validated image filenames belonging to 5 classes.


In [0]:
# Best model with basic CNN
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(100,100,3), padding='same'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))

In [0]:
# Create a checkpoint for saving the optimum validation loss parameter
from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('weights_cap2_3.hdf5', monitor='val_loss', save_best_only=True)
callbacks_list = [checkpoint]
    

In [0]:
# Compile the model
from keras import optimizers
model.compile(optimizers.rmsprop(lr=0.001),loss='categorical_crossentropy', metrics=["accuracy"])

In [50]:
%%time
### TRAIN THE MODEL
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=250,
                    callbacks=callbacks_list)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

In [51]:
# Evaluate the model on validation and test set
print(model.evaluate_generator(valid_generator))
model.evaluate_generator(test_generator)

[0.8309193852904718, 0.9]


[0.7641155040264129, 0.8599999904632568]

In [52]:
# Load the checkpoint in to a new model to compare
new_model = keras.models.load_model('weights_cap2_3.hdf5')
new_model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 100, 100, 32)      896       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 98, 98, 32)        9248      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 49, 49, 32)        0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 49, 49, 32)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 49, 49, 64)        18496     
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 47, 47, 64)        36928     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 23, 23, 64)       

In [53]:
# Evaluate the new model on validation and test set
print(new_model.evaluate_generator(valid_generator))
print(new_model.evaluate_generator(test_generator))

[0.2686952355752389, 0.97]
[0.2596906507015228, 0.9000000023841858]


In [54]:
# Get the confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
test_generator.reset()
Y_pred = new_model.predict_generator(test_generator)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(test_generator.classes, y_pred))


Confusion Matrix
[[ 9  0  0  0  1]
 [ 0 10  0  0  0]
 [ 0  1  8  1  0]
 [ 1  0  0  9  0]
 [ 1  0  0  0  9]]


In [55]:
# Get classification report
print('Classification Report')
target_names = ['Apple', 'Blackberry', 'Green Grapes', 'Kiwi', 'Strawberry']
print(classification_report(test_generator.classes, y_pred, target_names=target_names))

Classification Report
              precision    recall  f1-score   support

       Apple       0.82      0.90      0.86        10
  Blackberry       0.91      1.00      0.95        10
Green Grapes       1.00      0.80      0.89        10
        Kiwi       0.90      0.90      0.90        10
  Strawberry       0.90      0.90      0.90        10

    accuracy                           0.90        50
   macro avg       0.91      0.90      0.90        50
weighted avg       0.91      0.90      0.90        50

