In [31]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.optimizers import Adam
from keras.optimizers import RMSprop
from keras.utils.vis_utils import plot_model
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from keras import backend as K
from myModel.model import VGGNet
import matplotlib.pyplot as plt
from imutils import paths
import numpy as np
import argparse
import random
import pickle
import cv2
import os

In [32]:
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", # --dataset : The path to our dataset. add required=True if you want
	help="path to input dataset (i.e., directory of images)")
ap.add_argument("-m", "--model", # --model : The path to our output serialized Keras model.
	help="path to output model")
ap.add_argument("-l", "--labelbin", # --labelbin : The path to our output multi-label binarizer object.
	help="path to output label binarizer")
ap.add_argument("-p", "--plot", type=str, default="plot.png", # --plot : The path to our output plot of training loss and accuracy.
	help="path to output accuracy/loss plot")
args = vars(ap.parse_args())

usage: ipykernel_launcher.py [-h] [-d DATASET] [-m MODEL] [-l LABELBIN]
                             [-p PLOT]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\Outhm\AppData\Roaming\jupyter\runtime\kernel-802876e1-7d4d-48e4-b85a-008a0a726acc.json


SystemExit: 2

In [39]:
img_dim = (45,45,3)
EPOCHS = 12
train_data_dir = 'splited_dataset/train'
test_data_dir = 'splited_dataset/test'
BS = 32
LR = 1e-3
labels = []
train_samples_nbr = file_count = sum(len(files) for _, _, files in os.walk(r'splited_dataset/train'))
test_samples_nbr = file_count = sum(len(files) for _, _, files in os.walk(r'splited_dataset/test'))
print("Number of Training images : ",train_samples_nbr)
print("Number of Testing images : ",test_samples_nbr)

Number of Training images :  300779
Number of Testing images :  75194


In [40]:
if K.image_data_format() == 'channels_first':
    input_shape = (img_dim[2], img_dim[0], img_dim[1])
else:
    input_shape = (img_dim[0], img_dim[1], img_dim[2])

In [41]:
# grab the image paths and randomly shuffle them
print("[INFO] loading images...")
#imagePaths = sorted(list(paths.list_images(args["dataset"])))
imagePaths = sorted(list(paths.list_images(train_data_dir)))
#imagePaths = sorted(list(paths.list_images("data/extracted_images")))
random.seed(42)
random.shuffle(imagePaths)

[INFO] loading images...


In [42]:
l = label = [ item for item in os.listdir(train_data_dir) if os.path.isdir(os.path.join(train_data_dir, item)) ]
labels.append(l)
print("Classes : ",labels[0]) #labels

Classes :  ['!', '(', ')', '+', ',', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '=', 'A', 'alpha', 'ascii_124', 'b', 'beta', 'C', 'cos', 'd', 'Delta', 'div', 'e', 'exists', 'f', 'forall', 'forward_slash', 'G', 'gamma', 'geq', 'gt', 'H', 'i', 'in', 'infty', 'int', 'j', 'k', 'l', 'lambda', 'ldots', 'leq', 'lim', 'log', 'lt', 'M', 'mu', 'N', 'neq', 'o', 'p', 'phi', 'pi', 'pm', 'prime', 'q', 'R', 'rightarrow', 'S', 'sigma', 'sin', 'sqrt', 'sum', 'T', 'tan', 'theta', 'times', 'u', 'v', 'w', 'X', 'y', 'z', '[', ']', '{', '}']


In [43]:
# binarize the labels using scikit-learn's special multi-label
# binarizer implementation
print("[INFO] class labels:")
mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(labels)
 
# loop over each of the possible class labels and show them
for (i, label) in enumerate(mlb.classes_):
	print("{}. {}".format(i + 1, label))

[INFO] class labels:
1. !
2. (
3. )
4. +
5. ,
6. -
7. 0
8. 1
9. 2
10. 3
11. 4
12. 5
13. 6
14. 7
15. 8
16. 9
17. =
18. A
19. C
20. Delta
21. G
22. H
23. M
24. N
25. R
26. S
27. T
28. X
29. [
30. ]
31. alpha
32. ascii_124
33. b
34. beta
35. cos
36. d
37. div
38. e
39. exists
40. f
41. forall
42. forward_slash
43. gamma
44. geq
45. gt
46. i
47. in
48. infty
49. int
50. j
51. k
52. l
53. lambda
54. ldots
55. leq
56. lim
57. log
58. lt
59. mu
60. neq
61. o
62. p
63. phi
64. pi
65. pm
66. prime
67. q
68. rightarrow
69. sigma
70. sin
71. sqrt
72. sum
73. tan
74. theta
75. times
76. u
77. v
78. w
79. y
80. z
81. {
82. }


In [44]:
print("[INFO] compiling model...")
model = VGGNet.build(
	width=img_dim[1], height=img_dim[0],
	depth=img_dim[2], classes=82,
    activFct="softmax") #for multi-class classification

[INFO] compiling model...


In [45]:
opt = Adam(lr=LR, decay=LR / EPOCHS)
#opt = RMSprop(lr=LR, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy']) #binary_crossentropy training 99% acc 

In [46]:
# data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    featurewise_center=False,# set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range=5.0,  # randomly rotate images in the range (deg 0 to 180)
    width_shift_range=0.0,  # randomly shift images horizontally
    height_shift_range=0.0,  # randomly shift images vertically
    horizontal_flip=False,  # randomly flip images
    vertical_flip=False
    )

# data augmentation for testing
test_datagen = ImageDataGenerator(rescale=1. / 255)

In [47]:
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_dim[0], img_dim[1]),
    batch_size=BS,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_dim[0], img_dim[1]),
    batch_size=BS,
    class_mode='categorical')

Found 300779 images belonging to 82 classes.
Found 75194 images belonging to 82 classes.


In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=train_samples_nbr // BS,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=test_samples_nbr // BS)

Epoch 1/12
   2/9399 [..............................] - ETA: 4:16:56 - loss: 4.8791 - acc: 0.0156    

  % delta_t_median)


  39/9399 [..............................] - ETA: 2:32:51 - loss: 4.2431 - acc: 0.0545

In [None]:
# save the model to disk
print("[INFO] serializing network...")
#model.save(args["model"])
model.save("trained_model.model")
model.save_weights("weights.h5")
#save the multi-label binarizer to disk
print("[INFO] serializing label binarizer...")
# f = open(args["labelbin"], "wb")
f = open("mlb.pickle", "wb")
f.write(pickle.dumps(mlb))
f.close()

In [None]:
probabilities = model.predict_generator(validation_generator,2000)

scores = model.evaluate_generator(validation_generator,test_samples_nbr) #1514 testing images
print("Accuracy = ", scores[1])

In [None]:
fig1, ax_acc = plt.subplots()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Model - Accuracy')
plt.legend(['Training', 'Validation'], loc='lower right')
plt.savefig("plotting.png")