In [0]:
from keras.layers.convolutional import ZeroPadding2D
from keras.utils import np_utils
from keras.models import load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, Activation, BatchNormalization
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras import optimizers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast
from PIL import Image, ImageDraw
import cv2
from sklearn.utils import shuffle
from tensorflow.keras.applications import MobileNet
%matplotlib inline
from tensorflow.keras.optimizers import Adam
#! pip install pydrive

# Loading Data

In [0]:
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('~/data')
try:
  os.makedirs(local_download_path)
except: pass


file_list = drive.ListFile(
    {'q': "'1GiITmsHYBfeGth-LTfnXpd6524D4-uDm' in parents"}).GetList()


In [5]:
# Testing for 5 classes
i =1
for f in file_list:
  i = i+1
  # 3. Create & download by id.
  print('title: %s, id: %s' % (f['title'], f['id']))
  fname = os.path.join(local_download_path, f['title'])
  # print('downloading to {}'.format(fname))
  f_ = drive.CreateFile({'id': f['id']})
  f_.GetContentFile(fname)
  if i == 6:
    break


title: bench.csv, id: 1dYztgciO88Z_kI12edZdLCtrQR9VLfbI
title: shovel.csv, id: 1zvhytLDcfqY7uA3y_Ot5YKxBica9Dw8D
title: screwdriver.csv, id: 1VQNK5KowUhRtujJfwW7BvO91F-waYW2i
title: piano.csv, id: 1Guz55mbo1cDM3ZWt3u-Mlw0FGyfKKZZQ
title: crayon.csv, id: 1Daw8Jvx7yG7JN3pa29sI_eZqzHTLaKOQ


#Helper Functions


In [0]:
def plot_metrics_primary(acc, val_acc, loss, val_loss):
    fig, (ax1, ax2) = plt.subplots(1, 2, sharex='col', figsize=(20,7))

    ax1.plot(acc, label='Train Accuracy')
    ax1.plot(val_acc, label='Validation accuracy')
    ax1.legend(loc='best')
    ax1.set_title('Accuracy')

    ax2.plot(loss, label='Train loss')
    ax2.plot(val_loss, label='Validation loss')
    ax2.legend(loc='best')
    ax2.set_title('Loss')

    plt.xlabel('Epochs')
   

def plot_confusion_matrix(cnf_matrix, labels): 
    cnf_matrix_norm = cnf_matrix.astype('float') / cnf_matrix.sum(axis=1)[:, np.newaxis]
    df_cm = pd.DataFrame(cnf_matrix_norm, index=labels, columns=labels)
    plt.figure(figsize=(20,7))
    sns.heatmap(df_cm, annot=True, fmt='.2f', cmap="Blues")
    plt.show()
    
# Function to plot images.
def plot_image(image_array):
    fig2 = plt.figure()
    ax2 = fig2.add_subplot(111, frameon=False)
    ax2.imshow(image_array)
    plt.axis('off')
    plt.show()

#sample_1 = test['drawing_np'].values[0]
#plot_image(sample_1)

#Params


In [0]:
# Model parameters
BATCH_SIZE = 64
EPOCHS = 15
LEARNING_RATE = 0.001
HEIGHT = 32
WIDTH = 32
CHANNEL = 1

##Reading Data


In [8]:
TRAIN_PATH = '/root/data/'
TEST_PATH = '../input/test_simplified.csv'
SUBMISSION_NAME = 'submission.csv'
n_cat = 5

train = pd.DataFrame()
for file in os.listdir(TRAIN_PATH):
    train = train.append(pd.read_csv(TRAIN_PATH + file, usecols=[1, 5], nrows = 2000))
# Shuffle dat
train = shuffle(train, random_state=123)
print(len(train))
print(list(train))

10000
['drawing', 'word']


In [9]:
#Fixing labels.
train['word'] = train['word'].replace(' ', '_', regex=True)
# Get labels and one-hot encode them.
classes_names = train['word'].unique()
print(classes_names)
labels = pd.get_dummies(train['word']).values
train.drop(['word'], axis=1, inplace=True)
N_CLASSES = len(classes_names)

['shovel' 'crayon' 'piano' 'bench' 'screwdriver']


In [0]:
# third work
def drawing_to_np(drawing, shape=(32, 32)):
    # evaluates the drawing array
    drawing = eval(drawing)
    fig, ax = plt.subplots()
    for x,y in drawing:
        ax.plot(x, y, marker='.')
        ax.axis('off')        
    fig.canvas.draw()
    # Close figure so it won't get displayed while transforming the set
    plt.close(fig)
    # Convert images to numpy array
    np_drawing = np.array(fig.canvas.renderer._renderer)
    # Take only one channel
    np_drawing =np_drawing[:, :, 1]    
    # Normalize data
    np_drawing = np_drawing / 255.
    return cv2.resize(np_drawing, shape) # Resize array


In [0]:
# Transform drawing into numpy arrays
train['drawing'] = train['drawing'].apply(drawing_to_np)

In [12]:
print(train['drawing'].shape)

(10000,)


In [13]:
# Reshape arrays
train_drawings = np.asarray([x.reshape(HEIGHT, WIDTH, CHANNEL) for x in train['drawing'].values])
print(train_drawings.shape)

(10000, 32, 32, 1)


In [14]:
x_train, x_val, y_train, y_val = train_test_split(train_drawings, labels, test_size= 0.2, random_state=1)
print(y_train.shape)

(8000, 5)


# Mobile Net

In [0]:
STEPS = 800
EPOCHS = 10
size = 32
batchsize = 680
optimizer = optimizers.adam(lr=LEARNING_RATE)

In [19]:
model_mn = MobileNet(input_shape=(size, size, 1), alpha=1., weights=None, classes=N_CLASSES)
model_mn.compile(optimizer= Adam(lr=0.002), loss='categorical_crossentropy',
              metrics=["accuracy"])
print(model_mn.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 32, 32, 1)         0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 33, 33, 1)         0         
_________________________________________________________________
conv1 (Conv2D)               (None, 16, 16, 32)        288       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 16, 16, 32)        128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 16, 16, 32)        0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 16, 16, 32)        288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 16, 16, 32)        128       
__________

In [21]:
history = model_mn.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=15, validation_data=(x_val, y_val))

Train on 8000 samples, validate on 2000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [0]:
plot_metrics_primary(history.history['acc'], history.history['val_acc'], history.history['loss'], history.history['val_loss'])

# Model CNN


In [0]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(5,5),padding='Same', activation='relu', input_shape=(HEIGHT, WIDTH, CHANNEL)))

model.add(Conv2D(32, kernel_size=(5,5),padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3,3),padding='Same', activation='relu'))
model.add(Conv2D(64, kernel_size=(3,3),padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(50, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(N_CLASSES, activation = "softmax"))
optimizer = optimizers.adam(lr=LEARNING_RATE)
model.compile(optimizer=optimizer , loss="categorical_crossentropy", metrics=["accuracy"])
# model.summary()

In [26]:
history = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(x_val, y_val))
# history = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS)

Train on 8000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [0]:
# print(np.argmax(y_val, axis=1))
# print(model.predict_classes(x_val))
#cnf_matrix = confusion_matrix(np.argmax(y_val, axis=1), model.predict_classes(x_val))
# print(cnf_matrix)
#print(classes_name)
# plot_confusion_matrix(cnf_matrix, classes_names)

#Test


In [0]:
# Transform drawing into numpy arrays.
#test['drawing_np'] = test['drawing'].apply(drawing_to_np)
# Reshape arrays.
#test_drawings = np.asarray([x.reshape(HEIGHT, WIDTH, CHANNEL) for x in test['drawing_np'].values])

In [0]:
# predictions = model.predict(test_drawings)
# top_3_predictions = np.asarray([np.argpartition(pred, -3)[-3:] for pred in predictions])
# top_3_predictions = ['%s %s %s' % (classes_names[pred[0]], classes_names[pred[1]], classes_names[pred[2]]) for pred in top_3_predictions]
# test['word'] = top_3_predictions

In [0]:
# submission = test[['key_id', 'word']]
# submission.to_csv(SUBMISSION_NAME, index=False)
# submission.head()

# Image to Array


In [0]:
# one way
# examples = [ast.literal_eval(e) for e in data['drawing'][:15].values]
# fig, ax = plt.subplots(1,15,figsize=(20,2))
# for i, example in enumerate(examples[:15]):
#     for x, y in example:
#         ax[i].plot(x, y, marker='.', markersize=1, lw=3)
#         ax[i].invert_yaxis()
#         ax[i].axis('off')
# #plt.close('all')
#plt.gcf()


In [0]:
# second way
# def draw_it(raw_strokes):
#     image = Image.new("P", (255,255), color=255)
#     image_draw = ImageDraw.Draw(image)

#     for stroke in eval(raw_strokes):
#         for i in range(len(stroke[0])-1):

#             image_draw.line([stroke[0][i], 
#                              stroke[1][i],
#                              stroke[0][i+1], 
#                              stroke[1][i+1]],
#                             fill=0, width=6)
#             image.show()
#     return np.array(image)
# img = draw_it(data_mat[0])