In [1]:
!git clone https://github.com/Calvinwilson99/HackerEarth-DeepLearning.git


Cloning into 'HackerEarth-DeepLearning'...
remote: Enumerating objects: 9128, done.[K
remote: Total 9128 (delta 0), reused 0 (delta 0), pack-reused 9128[K
Receiving objects: 100% (9128/9128), 74.18 MiB | 34.62 MiB/s, done.
Resolving deltas: 100% (1927/1927), done.


In [2]:
# Import necessary header files 

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import VGG16
import random

In [0]:
# Import the training dataset
# y is list of target values

train = pd.read_csv("/content/HackerEarth-DeepLearning/dataset/train.csv")
y = train.iloc[:,1].values
y = LabelEncoder().fit_transform(y)

In [0]:
# Read each image and add matrix to X (list of training values)

X = []
for i in range(len(train)):
    image = cv2.imread("/content/HackerEarth-DeepLearning/dataset/Train Images/" + train.Image[i])
    resized = cv2.resize(image, (224,224))
    X.append(resized)

In [5]:
# Split into training and test set

X = np.array(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
print(X_train.dtype)

uint8


In [6]:
# Import VGG16 architecture to help in learning - Expects input shape to be (224,224,3) (remove output layer)
# Add our final layer for output

trained_model = VGG16(weights="imagenet",
    include_top=False, 
    input_shape=(224, 224, 3), 
    pooling='avg')
trained_model.trainable = False

model = Sequential()

model.add(trained_model)
model.add(Dropout(0.2))
model.add(Dense(4, activation = "softmax"))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [0]:
# callbacks to save model weights at checkpoints, change learning rate dynamically

callbacks = [
    EarlyStopping(patience = 10, verbose = 1),
    ReduceLROnPlateau(factor = 0.1, patience = 3,
    min_lr = 0.00001, verbose = 1),
    ModelCheckpoint('/content/HackerEarth-DeepLearning/models/model.h5',verbose = 1, save_best_only = True,
    save_weights_only = True)
]

In [0]:
# Compile the model

model.compile(optimizer = "Adam", metrics = ['accuracy'], loss = 'sparse_categorical_crossentropy')

In [0]:
# Train the model

model.fit(X_train, y_train, epochs = 50, validation_data = (X_test,y_test), callbacks = callbacks)

Train on 4786 samples, validate on 1197 samples
Epoch 1/50
Epoch 00001: val_loss improved from inf to 1.77396, saving model to /content/HackerEarth-DeepLearning/model.h5
Epoch 2/50
Epoch 00002: val_loss improved from 1.77396 to 1.18174, saving model to /content/HackerEarth-DeepLearning/model.h5
Epoch 3/50
Epoch 00003: val_loss improved from 1.18174 to 1.00108, saving model to /content/HackerEarth-DeepLearning/model.h5
Epoch 4/50
Epoch 00004: val_loss improved from 1.00108 to 0.96883, saving model to /content/HackerEarth-DeepLearning/model.h5
Epoch 5/50
Epoch 00005: val_loss improved from 0.96883 to 0.84928, saving model to /content/HackerEarth-DeepLearning/model.h5
Epoch 6/50
Epoch 00006: val_loss improved from 0.84928 to 0.79709, saving model to /content/HackerEarth-DeepLearning/model.h5
Epoch 7/50
Epoch 00007: val_loss improved from 0.79709 to 0.75873, saving model to /content/HackerEarth-DeepLearning/model.h5
Epoch 8/50
Epoch 00008: val_loss did not improve from 0.75873
Epoch 9/50
E

<tensorflow.python.keras.callbacks.History at 0x7f35f007f240>

In [0]:
# Load saved weights for prediction

pred_model = Sequential()

pred_model.add(trained_model)
pred_model.add(Dropout(0.2))
pred_model.add(Dense(4, activation = "softmax"))
pred_model.load_weights('/content/HackerEarth-DeepLearning/models/best_model.h5')

In [0]:
# Predicting output on X_test

y_pred = pred_model.predict(X_test)

In [0]:
# COnverting output to text labels

y_pre = [np.argmax(i) for i in y_pred]
output = ["Attire", "Decorationandsignage", "Food", "misc"]

pred_labels = [output[i] for i in y_pre]
corr_labels = [output[i] for i in y_test]

In [13]:
# Print accuracy

print("ACCURACY SCORE")
print(accuracy_score(y_test, y_pre))

print("CONFUSION MATRIX")
print(confusion_matrix(y_test, y_pre))

ACCURACY SCORE
0.7677527151211362
CONFUSION MATRIX
[[259   4  37  30]
 [  6 114  12  11]
 [ 38  18 388  16]
 [ 37  19  50 158]]


In [15]:
# testing the results (run cell again for different outputs)

for i in range(4):
  ind = random.randint(0, len(y_test))
  print("predicted: ", pred_labels[ind], "Correct: ", corr_labels[ind])

predicted:  Food Correct:  Food
predicted:  Food Correct:  misc
predicted:  Food Correct:  Food
predicted:  misc Correct:  misc
