In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import math
import os
import tensorflow as tf
from tensorflow.keras import optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
data_dir = "C:\\Users\\M1050683\\Documents\\OpenVINO_training\\Neural_networks\\Image_classification\\Data_augmentation_ver2\\train"
train_labels = pd.read_csv(r"C:\Users\M1050683\Documents\OpenVINO_training\Neural_networks\Image_classification\trainLabels.csv",dtype=str)

id_new = []
for i in train_labels["id"]:
    id_new.append(str(i)+".png")

id_new = np.array(id_new)

only_labels = np.array(train_labels["label"])

final_data = pd.DataFrame({"id":id_new,"label":only_labels})

final_data.head()


Unnamed: 0,id,label
0,1.png,frog
1,2.png,truck
2,3.png,truck
3,4.png,deer
4,5.png,automobile


In [3]:
catagorie = list(set(final_data["label"]))
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)


train_generator=datagen.flow_from_dataframe(
    dataframe=final_data[:40000],
    directory=data_dir,
    x_col="id",
    y_col="label",
    batch_size=80,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    classes=catagorie,
    target_size=(32,32))

valid_generator=test_datagen.flow_from_dataframe(
    dataframe=final_data[40000:45000],
    directory=data_dir,
    x_col="id",
    y_col="label",
    batch_size=80,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    classes=catagorie,
    target_size=(32,32))

test_generator=test_datagen.flow_from_dataframe(
    dataframe=final_data[45000:],
    directory=data_dir,
    x_col="id",
    batch_size=1,
    seed=42,
    shuffle=False,
    class_mode=None,
    target_size=(32,32))

Found 40000 validated image filenames belonging to 10 classes.
Found 5000 validated image filenames belonging to 10 classes.
Found 5000 validated image filenames.


In [4]:
model = Sequential()

model.add(Conv2D(64, (3, 3), padding='same', input_shape=(32,32,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# model.add(Conv2D(32, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# model.add(Conv2D(64, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.25))

model.add(Dense(10, activation='softmax'))
model.compile(optimizers.RMSprop(lr=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 64)        1792      
_________________________________________________________________
activation (Activation)      (None, 32, 32, 64)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
activation_1 (Activation)    (None, 16, 16, 128)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 128)         0

In [6]:
# fit 

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
print("No. of dataset: ",train_generator.n)
print("Batch size: ",train_generator.batch_size)
print(STEP_SIZE_TRAIN)
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

print("\nShape of img:",train_generator.image_shape)

model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=15
)

No. of dataset:  40000
Batch size:  80
500

Shape of img: (32, 32, 3)
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x22a55e3d940>

In [23]:
#Save the model
model.save("image_classification_augmentation.model")

INFO:tensorflow:Assets written to: image_classification_augmentation.model\assets


In [7]:
# Evaluate
model.evaluate_generator(generator=valid_generator,steps=STEP_SIZE_TEST)

[0.9279013316273689, 0.67996424]

In [8]:
# Predict the output
test_generator.reset()
pred=model.predict_generator(test_generator,steps=STEP_SIZE_TEST,verbose=1)



In [9]:
print(pred)

[[1.2869880e-05 7.9745405e-06 1.2557449e-04 ... 9.8688716e-01
  5.8514013e-07 2.9880081e-05]
 [9.9062745e-04 9.5525050e-01 9.8656584e-04 ... 8.6568652e-05
  4.1795266e-04 4.1256923e-02]
 [2.2564158e-03 1.4382422e-03 1.3115683e-01 ... 6.2502041e-02
  1.4852079e-03 4.7334954e-03]
 ...
 [1.3708910e-01 4.9262602e-02 2.2671539e-03 ... 2.1193593e-03
  5.7542812e-02 7.4252945e-01]
 [1.9067569e-02 9.2680341e-01 5.4934825e-04 ... 3.1326601e-04
  3.0034514e-02 2.2173405e-02]
 [4.1504446e-01 1.7884819e-01 1.3670329e-02 ... 2.2262016e-02
  2.5103468e-01 8.8763595e-02]]


In [10]:
# creating a reverse catagory
reverse_catagory = {i:catagorie[i] for i in range(len(catagorie))}
reverse_catagory[10] = "other"
print(reverse_catagory)

{0: 'truck', 1: 'deer', 2: 'dog', 3: 'cat', 4: 'bird', 5: 'horse', 6: 'automobile', 7: 'frog', 8: 'airplane', 9: 'ship', 10: 'other'}


In [11]:
predicted_class_indices=np.argmax(pred,axis=1)

In [12]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [13]:
Actual = final_data["label"][45000:]
print(Actual)

45000         horse
45001    automobile
45002          deer
45003    automobile
45004      airplane
            ...    
49995          bird
49996          frog
49997         truck
49998    automobile
49999    automobile
Name: label, Length: 5000, dtype: object


In [14]:
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Actual":Actual,
                     "Predictions":predictions,})
results

Unnamed: 0,Filename,Actual,Predictions
45000,45001.png,horse,horse
45001,45002.png,automobile,automobile
45002,45003.png,deer,deer
45003,45004.png,automobile,automobile
45004,45005.png,airplane,airplane
...,...,...,...
49995,49996.png,bird,ship
49996,49997.png,frog,cat
49997,49998.png,truck,truck
49998,49999.png,automobile,automobile


## Finally with the actual test dataset

In [15]:
test_dir = "C:\\Users\\M1050683\\Documents\\OpenVINO_training\\Neural_networks\\Image_classification\\Data_augmentation_ver2\\test"
test_data = pd.read_csv(r"C:\Users\M1050683\Documents\OpenVINO_training\Neural_networks\Image_classification\testLabels.csv",dtype=str)


In [16]:
id_new = []
for i in test_data["id"]:
    id_new.append(str(i)+".jpg")

id_new = np.array(id_new)

# only_labels = np.array([])

test_data = pd.DataFrame({"id":id_new})

test_data.head()

Unnamed: 0,id
0,1.jpg
1,2.jpg
2,3.jpg
3,4.jpg
4,5.jpg


In [17]:
final_test_datagen=ImageDataGenerator(rescale=1./255.)

final_test_generator=final_test_datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=test_dir,
    x_col="id",
    batch_size=1,
    seed=42,
    shuffle=False,
    class_mode=None,
    target_size=(32,32))

Found 1365 validated image filenames.


In [18]:
STEP_SIZE_TEST=final_test_generator.n//final_test_generator.batch_size

final_test_generator.reset()
pred=model.predict_generator(final_test_generator,steps=STEP_SIZE_TEST,verbose=1)



In [19]:
# predicted_class_indices_test =np.argmax(pred,axis=1)
output = []

def location(arr):
    max_ele = 0
    location = 0
    for i in range(len(arr)):
        if arr[i] > max_ele:
            max_ele = arr[i]
            location = i
    if max_ele < 0.5:
        return 10
    return location


for img in pred:
    type_img = location(img)
    output.append(reverse_catagory[type_img])

In [20]:
output[:10]

['other',
 'other',
 'other',
 'other',
 'airplane',
 'ship',
 'other',
 'bird',
 'other',
 'other']

In [21]:
filenames=final_test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                     "Predictions":output})
results

Unnamed: 0,Filename,Predictions
0,1.jpg,other
1,2.jpg,other
2,3.jpg,other
3,4.jpg,other
4,5.jpg,airplane
...,...,...
1360,1361.jpg,automobile
1361,1362.jpg,truck
1362,1363.jpg,other
1363,1364.jpg,other
