In [1]:
## MobileNet e Vgg16 sono state trainate per classificare 1000 diverse categorie.
## Tra queste: cani e gatti.
## Vediamo ora di testare il transfer-learning su categorie MAI viste da MobileNet (su cane vs gatto era più facile 
## la cosa, perché erano già nel training)

## Sign language DIGITS!

In [2]:
## 1. Download the data
# https://github.com/ardamavi/Sign-Language-Digits-Dataset

#git clone https://github.com/ardamavi/Sign-Language-Digits-Dataset.git


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.applications import imagenet_utils
from sklearn.metrics import confusion_matrix
import itertools
import os
import shutil
import random
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
## Organize data into train, valid, test dirs (just the first time)
# cwd = os.getcwd()
# print(cwd)
# os.chdir("data/Sign-Language-Digits-Dataset/")
# if os.path.isdir("train/0/") is False:
#     os.mkdir("train")
#     os.mkdir("valid")
#     os.mkdir("test")
    
#     for i in range(0,10):
#         print(i)
#         shutil.move(f'{i}','train')
#         os.mkdir(f'valid/{i}')
#         os.mkdir(f'test/{i}')
        
#         valid_samples = random.sample(os.listdir(f'train/{i}'),30)
#         for j in valid_samples:
#             shutil.move(f'train/{i}/{j}',f'valid/{i}')
            
#         test_samples = random.sample(os.listdir(f'train/{i}'),5)
#         for k in test_samples:
#             shutil.move(f'train/{i}/{k}',f'test/{i}')
            
# os.chdir("../..")

/home/gfasanel/Keras_notebooks
0
1
2
3
4
5
6
7
8
9


In [6]:
## Preprocess input
train_path = "data/Sign-Language-Digits-Dataset/train"
valid_path = "data/Sign-Language-Digits-Dataset/valid"
test_path = "data/Sign-Language-Digits-Dataset/test"

train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input) \
.flow_from_directory(directory=train_path, target_size=(224,224), batch_size=10)

valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input) \
.flow_from_directory(directory=valid_path, target_size=(224,224), batch_size=10)

test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input) \
.flow_from_directory(directory=test_path, target_size=(224,224), batch_size=10, shuffle=False)

assert train_batches.n == 1712
assert valid_batches.n == 300
assert test_batches.n == 50

Found 1712 images belonging to 10 classes.
Found 300 images belonging to 10 classes.
Found 50 images belonging to 10 classes.


In [7]:
## Importiamo il MobileNet
mobile = tf.keras.applications.mobilenet.MobileNet()
mobile.summary()

Model: "mobilenet_1.00_224"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 225, 225, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32

In [8]:
## Take all the layers up to the 6th to last layer.
## Perché prima scartavamo solo l'ultimo e ora gli ultimi 6?
## Perché prima cani e gatti erano già classi di vgg16, ora il linguaggio dei segni NON è una classe già vista
## Devo allontanarmi un po' dall'output layer di mobilenet, altrimenti "ha trainato troppo"

x=mobile.layers[-6].output
output=Dense(units=10, activation="softmax") (x) ## 10 outputs ovviamente, perché ho 10 numeri da classificare

## Cosa è questa (x) ?
## Risposta: MobileNet NON usa la sequential API, ma la functional API.
## O crei un modello sequenziale tuo, e copi layer per layer (come abbiamo fatto nel transfer learning con CNN)
## Oppure impari la sintassi dei functional models

## Se vedi, è facile: x è il modello fino al layer -6.
## Dopo, gli monti sopra un Dense layer. Questo vuol dire Dense(...) (x)

In [10]:
model = Model(inputs=mobile.input, outputs=output)
## Freeze SOME layers (-23 è venuto fuori da una dura lotta e sperimentazione)
for layer in model.layers[:-23]:
    layer.trainable = False
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 225, 225, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128   

In [12]:
## Fit
model.compile(optimizer=Adam(lr=0.0001), loss="categorical_crossentropy", metrics=["accuracy"])
## Run for more epochs (~30) to see better results
model.fit(x=train_batches, validation_data=valid_batches, epochs=10, verbose=2)

Epoch 1/10
172/172 - 45s - loss: 0.6462 - accuracy: 0.7973 - val_loss: 0.3215 - val_accuracy: 0.8967
Epoch 2/10
172/172 - 40s - loss: 0.1194 - accuracy: 0.9790 - val_loss: 0.1430 - val_accuracy: 0.9700
Epoch 3/10
172/172 - 35s - loss: 0.0629 - accuracy: 0.9918 - val_loss: 0.1023 - val_accuracy: 0.9800
Epoch 4/10
172/172 - 34s - loss: 0.0342 - accuracy: 0.9971 - val_loss: 0.0881 - val_accuracy: 0.9867
Epoch 5/10
172/172 - 34s - loss: 0.0248 - accuracy: 0.9965 - val_loss: 0.0846 - val_accuracy: 0.9867
Epoch 6/10
172/172 - 34s - loss: 0.0177 - accuracy: 0.9988 - val_loss: 0.0705 - val_accuracy: 0.9900
Epoch 7/10
172/172 - 35s - loss: 0.0133 - accuracy: 0.9994 - val_loss: 0.0815 - val_accuracy: 0.9867
Epoch 8/10
172/172 - 34s - loss: 0.0181 - accuracy: 0.9965 - val_loss: 0.0671 - val_accuracy: 0.9933
Epoch 9/10
172/172 - 34s - loss: 0.0101 - accuracy: 0.9994 - val_loss: 0.0721 - val_accuracy: 0.9900
Epoch 10/10
172/172 - 34s - loss: 0.0083 - accuracy: 1.0000 - val_loss: 0.0653 - val_accura

<tensorflow.python.keras.callbacks.History at 0x7fc251204790>

In [14]:
## Predict
predictions = model.predict(x=test_batches, verbose=0)
cm = confusion_matrix(y_true=test_batches.classes, y_pred=predictions.argmax(axis=1))
print(cm)
## All good, my friend!

[[5 0 0 0 0 0 0 0 0 0]
 [0 5 0 0 0 0 0 0 0 0]
 [0 0 5 0 0 0 0 0 0 0]
 [0 0 0 5 0 0 0 0 0 0]
 [0 0 0 0 5 0 0 0 0 0]
 [0 0 0 0 0 5 0 0 0 0]
 [0 0 0 0 0 0 5 0 0 0]
 [0 0 0 0 0 0 0 5 0 0]
 [0 0 0 0 0 0 0 0 5 0]
 [0 0 0 0 0 0 0 0 0 5]]


In [15]:
## Check that the indeces are what you think they are
test_batches.class_indices

{'0': 0,
 '1': 1,
 '2': 2,
 '3': 3,
 '4': 4,
 '5': 5,
 '6': 6,
 '7': 7,
 '8': 8,
 '9': 9}