In [1]:
from zipfile import ZipFile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from PIL import Image
import os
import keras
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
#zip = ZipFile('train.zip')
#zip.extractall()

#zip = ZipFile('test.zip')
#zip.extractall()

In [2]:
train_path = 'train'
test_path = 'test'

height = 224
width = 224
channels = 3

In [3]:
X_train = []
Y_train = []
X_test = []
Y_test = []
NUM_CATEGORIES = len(os.listdir(train_path))
NUM_CATEGORIES

9285

In [4]:
images = os.listdir(train_path)
for a in images:
    image = Image.open(train_path + '/' + a)
    image = image.resize((height,width))
    image = np.array(image)
    X_train.append(image)
    Y_train.append(a.split(' (')[0].lower())

X_train = np.array(X_train)
Y_train = np.array(Y_train)

In [6]:
set_labels = set(Y_train)
labels = {}
idx = 0
for i in set_labels:
    idx += 1
    labels[i] = idx


In [7]:
Y_train = [labels[i] for i in Y_train]

In [8]:
images = os.listdir(test_path)
for a in images:
    image = Image.open(test_path + '/' + a)
    image = image.resize((height,width))
    image = np.array(image)
    X_test.append(image)
    Y_test.append(a.split(' (')[0].lower())

X_test = np.array(X_test)
Y_test = np.array(Y_test)

In [9]:
Y_test = [labels['green called cattleheart'] if i == 'green celled cattleheart' else labels[i] for i in Y_test]

In [11]:
X_train = np.array(X_train) / 255
X_test = np.array(X_test) / 255
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

In [12]:
print(X_train.shape,X_test.shape,Y_train.shape,Y_test.shape)

(9285, 224, 224, 3) (750, 224, 224, 3) (9285,) (750,)


In [13]:
Y_train=keras.utils.to_categorical(Y_train,NUM_CATEGORIES)
Y_test= keras.utils.to_categorical(Y_test,NUM_CATEGORIES)

print(Y_train.shape)
print(Y_test.shape)

(9285, 9285)
(750, 9285)


In [14]:
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(height,width,3)))
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(NUM_CATEGORIES, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 conv2d_1 (Conv2D)           (None, 220, 220, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 110, 110, 32)     0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 110, 110, 32)      0         
                                                                 
 conv2d_2 (Conv2D)           (None, 108, 108, 64)      18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 106, 106, 64)      36928     
                                                        

In [15]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [16]:
aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    vertical_flip=False,
    fill_mode="nearest")

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_train,Y_train,test_size=0.2,random_state=42, shuffle=True)

In [18]:
batch_size = 32
epochs = 15

history = model.fit(aug.flow(X_train,Y_train,batch_size=batch_size),
                    epochs=epochs, validation_data=(X_test, Y_test) )

: 

In [None]:
plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

In [None]:
pred = model.predict_classes(X_test)

In [None]:
from sklearn.metrics import accuracy_score

print('Test Data accuracy: ',accuracy_score(labels, pred)*100)