# Comparação entre modelos para definir publicação ou não em produção
## Comparar:
* Accuracy geral
* Recall em vazios
* Recall em não vazios
* Velocidade
* Uso de memória

In [6]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from time import time
from sklearn.metrics import classification_report, confusion_matrix


# Carregar imagens (para modelos 2 e 3)

In [7]:
caminho_basest = os.path.join('..', 'bases', 'vazios')
caminho_traint = os.path.join(caminho_basest, 'train')
caminho_testt = os.path.join(caminho_basest, 'test')

In [8]:
SIZE = (224, 224)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_directory(
    caminho_testt,
    target_size=SIZE,
    batch_size=128,
    class_mode='binary',
    shuffle=False)

train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
    caminho_traint,
    target_size=SIZE,
    batch_size=128,
    class_mode='binary',
    shuffle=False)

Found 2317 images belonging to 2 classes.
Found 20845 images belonging to 2 classes.


# Carregar modelo scvvazios.pkl

In [4]:
import numpy as np
from PIL import Image
from sklearn.externals import joblib

SIZE = (128, 128)
MODEL_FILE = 'vaziossvc.pkl'

validation_generator_svc = validation_datagen.flow_from_directory(
    caminho_testt,
    target_size=SIZE,
    batch_size=128,
    class_mode='binary',
    shuffle=False)


model1 = joblib.load(MODEL_FILE)


Found 2317 images belonging to 2 classes.


In [5]:
%%time
y_pred1 = []
print(len(validation_generator_svc))
for ind in range(len(validation_generator_svc)):
    images, labels = next(validation_generator_svc)
    print(ind, images.shape)
    batch = images[:, :, :, 0]
    image_array = np.reshape(batch, (images.shape[0], images.shape[1] * images.shape[2]))
    %time predictions = model1.predict(image_array)
    y_pred1.extend(predictions)

19
0 (128, 128, 128, 3)
CPU times: user 5.13 s, sys: 0 ns, total: 5.13 s
Wall time: 5.13 s
1 (128, 128, 128, 3)
CPU times: user 5.02 s, sys: 0 ns, total: 5.02 s
Wall time: 5.01 s
2 (128, 128, 128, 3)
CPU times: user 5.12 s, sys: 3.66 ms, total: 5.12 s
Wall time: 5.12 s
3 (128, 128, 128, 3)
CPU times: user 5.24 s, sys: 0 ns, total: 5.24 s
Wall time: 5.24 s
4 (128, 128, 128, 3)
CPU times: user 5.12 s, sys: 0 ns, total: 5.12 s
Wall time: 5.12 s
5 (128, 128, 128, 3)
CPU times: user 5.11 s, sys: 0 ns, total: 5.11 s
Wall time: 5.11 s
6 (128, 128, 128, 3)
CPU times: user 5.2 s, sys: 0 ns, total: 5.2 s
Wall time: 5.2 s
7 (128, 128, 128, 3)
CPU times: user 5.12 s, sys: 0 ns, total: 5.12 s
Wall time: 5.12 s
8 (128, 128, 128, 3)
CPU times: user 5.11 s, sys: 0 ns, total: 5.11 s
Wall time: 5.11 s
9 (128, 128, 128, 3)
CPU times: user 5.06 s, sys: 0 ns, total: 5.06 s
Wall time: 5.06 s
10 (128, 128, 128, 3)
CPU times: user 5.1 s, sys: 0 ns, total: 5.1 s
Wall time: 5.1 s
11 (128, 128, 128, 3)
CPU times

In [6]:
y_pred1 = [0 if item == 1.0 else 1 for item in y_pred1]

In [7]:
y_pred1[:20]

[0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1]

In [8]:
validation_generator.labels[:20]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int32)

In [9]:
sum(y_pred1 == validation_generator.labels) / len(y_pred1)

0.9555459646094088

In [12]:
print(classification_report(validation_generator.labels, y_pred1, digits=4))

             precision    recall  f1-score   support

          0     1.0000    0.9117    0.9538      1166
          1     0.9179    1.0000    0.9572      1151

avg / total     0.9592    0.9555    0.9555      2317



In [13]:
print(confusion_matrix(validation_generator.labels, y_pred1))

[[1063  103]
 [   0 1151]]


# Carregar melhor modelo do notebook 01b3

In [4]:
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
SIZE = (224, 224)

model2 = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(16, (3, 3),
                         padding='same',
                         activation='relu',
                         input_shape=(*SIZE, 3)),
  MaxPooling2D(pool_size=(2, 2)),
  Conv2D(32, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.2),
  Conv2D(64, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(128, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(128, (3, 3), activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(256, (3, 3), activation='relu'),
  Flatten(),
  Dense(256, activation='relu'),
  Dropout(0.4),
  Dense(1, activation='sigmoid')
 
])


In [5]:
model2.load_weights(os.path.join('..', 'models', 'B3modelweights.17-0.08.hdf5'))

In [6]:
%time y_pred2 = model2.predict(validation_generator)

CPU times: user 1min 2s, sys: 8.05 s, total: 1min 10s
Wall time: 22.2 s


In [54]:
y_pred2_labels = y_pred2.reshape(-1) > 0.5

sum(y_pred2_labels == validation_generator.labels) / len(y_pred2_labels)

0.9620198532585239

In [55]:
print(classification_report(validation_generator.labels, y_pred2_labels, digits=4))

             precision    recall  f1-score   support

          0     1.0000    0.9245    0.9608      1166
          1     0.9290    1.0000    0.9632      1151

avg / total     0.9647    0.9620    0.9620      2317



In [56]:
print(confusion_matrix(validation_generator.labels, y_pred2_labels))

[[1078   88]
 [   0 1151]]


# Carregar melhor modelo do notebook 02c3

In [9]:
from tensorflow.keras.applications.densenet import DenseNet121

SIZE = (224, 224)
base_model = DenseNet121(weights='imagenet',
                         input_shape=(*SIZE, 3), 
                         include_top=False,
                         pooling='max')

In [10]:
def extract_features(generator, model):
    generator.reset()
    n_images = len(generator.filenames)
    batch_size = generator.batch_size
    m = batch_size *  (n_images // batch_size)  # Arredondar para não ficar espaço vazio
    m = n_images
    n = model.output.shape[1]
    features = np.zeros((m, n), np.float32)
    y_ = np.zeros((m, 1), np.float32)
    print(m, batch_size, m // batch_size)
    for ind in range(0, m + 1, batch_size):
        batch, y_batch = next(generator)
        features_batch = base_model.predict(batch)
        posi = ind * batch_size
        features[ind: ind + len(y_batch), :] = features_batch
        y_[ind: ind + len(y_batch), 0] = y_batch
    print('last batch setted elements %s:%s' % (ind, ind + len(y_batch)))
    return features, y_

%time features_test, y_test = extract_features(validation_generator, base_model)

2317 128 18
last batch setted elements 2304:2317
CPU times: user 14min 10s, sys: 56.1 s, total: 15min 6s
Wall time: 2min 35s


In [11]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(units=128,
                       activation='relu',
                       input_shape=(1024,)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(units=128,
                       activation='relu',
                       input_shape=(1024,)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))


In [12]:
MODEL_DIR = '../models_featureextraction'

epoch = 40
val_loss = 0.08
model.load_weights(
    os.path.join(MODEL_DIR,
                 'Transfermodelweights02c3.{:02d}-{:.2f}.hdf5'.format(epoch, val_loss)
                ))


In [13]:
%time y_pred3a = model.predict(features_test)

CPU times: user 163 ms, sys: 6.28 ms, total: 169 ms
Wall time: 126 ms


In [99]:
y_pred3a_labels = y_pred3a.reshape(-1) > 0.5

sum(y_pred3a_labels == validation_generator.labels) / len(y_pred3)

0.9598618903754855

In [100]:
print(classification_report(validation_generator.labels, y_pred3a_labels, digits=4))

             precision    recall  f1-score   support

          0     0.9900    0.9297    0.9589      1166
          1     0.9329    0.9904    0.9608      1151

avg / total     0.9616    0.9599    0.9598      2317



In [101]:
print(confusion_matrix(validation_generator.labels, y_pred3a_labels))

[[1084   82]
 [  11 1140]]


In [102]:
MODEL_DIR = '../models_featureextraction'

epoch = 13
val_loss = 0.10
model.load_weights(
    os.path.join(MODEL_DIR,
                 'Transfermodelweights02c3-classweights.{:02d}-{:.2f}.hdf5'.format(epoch, val_loss)
                ))


In [103]:
%time y_pred3 = model.predict(features_test)

CPU times: user 63.9 ms, sys: 5.31 ms, total: 69.2 ms
Wall time: 43 ms


In [104]:
y_pred3.shape

(2317, 1)

In [105]:
y_pred3_labels = y_pred3.reshape(-1) > 0.5

sum(y_pred3_labels == validation_generator.labels) / len(y_pred3)

0.9529564091497627

In [106]:
print(classification_report(validation_generator.labels[:len(y_pred3)], y_pred3_labels, digits=4))

             precision    recall  f1-score   support

          0     0.9656    0.9400    0.9526      1166
          1     0.9408    0.9661    0.9533      1151

avg / total     0.9533    0.9530    0.9530      2317



In [107]:
print(confusion_matrix(validation_generator.labels[:len(y_pred3)], y_pred3_labels))

[[1096   70]
 [  39 1112]]


In [108]:
1099 / (1099 + 70)  # Não Vazios classificados "erradamente" como vazios

0.9401197604790419

In [109]:
1112 / (1112 + 39)  # Vazios classificados "erradamente" como não vazios

0.9661164205039097

In [110]:
acertos1 = y_pred1 == validation_generator.labels

In [57]:
acertos2 = y_pred2_labels == validation_generator.labels

In [112]:
acertos3 = y_pred3_labels == validation_generator.labels

In [113]:
acertos3a = y_pred3a_labels == validation_generator.labels

In [114]:
sum(acertos1 == acertos2) / len(acertos1)

0.9935261113508848

In [115]:
sum(acertos2 == acertos3) / len(acertos2)

0.9753992231333621

In [116]:
sum(acertos1 == acertos3) / len(acertos1)

0.9689253344842469

In [117]:
sum(acertos3 == acertos3a) / len(acertos3)

0.9827362969356928

In [118]:
sum(acertos2 == acertos3a) / len(acertos2)

0.9909365558912386

# Carregar melhor modelo do notebook 04

In [16]:
import tensorflow as tf
from tensorflow.keras.layers import Activation, Conv2D, \
    BatchNormalization, concatenate, Dense, Dropout, Flatten, Input, MaxPooling2D
from PIL import Image

SIZE = (224, 224)

nuclear_model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(16, (3, 3),
                         padding='same',
                         activation='relu',
                         input_shape=(*SIZE, 3)),
  MaxPooling2D(pool_size=(2, 2)),
  Conv2D(32, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.2),
  Conv2D(64, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(128, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(128, (3, 3), activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(256, (3, 3), activation='relu'),
  Flatten(),
  Dense(128, activation='relu'),
])

a_in = Input(shape=(*SIZE, 3))
b_in = Input(shape=(*SIZE, 3))
a_feat = nuclear_model(a_in)
b_feat = nuclear_model(b_in)
combined_features = concatenate([a_feat, b_feat], name = 'merge_features')
combined_features = Dense(16, activation = 'linear')(combined_features)
combined_features = BatchNormalization()(combined_features)
combined_features = Activation('relu')(combined_features)
combined_features = Dense(4, activation = 'linear')(combined_features)
combined_features = BatchNormalization()(combined_features)
combined_features = Activation('relu')(combined_features)
combined_features = Dense(1, activation = 'sigmoid')(combined_features)
similarity_model = tf.keras.Model(inputs = [a_in, b_in], outputs = [combined_features], name = 'Similarity_Model')

MODEL_DIR = '../models/siamese'
epoch = 6
val_loss = 0.24

similarity_model.load_weights(os.path.join(
    MODEL_DIR, 
    '04modelweights.{:02d}-{:.2f}.hdf5'.format(epoch, val_loss)
    )
)


In [17]:
vazio1 = Image.open(os.path.join(caminho_traint, 'vazio/5aa1a06e2a87954aafdda3bc.jpg'))
vazio2 = Image.open(os.path.join(caminho_traint, 'vazio/5aa1cc192a87957e60f49482.jpg'))
nvazio1 = Image.open(os.path.join(caminho_traint, 'nvazio/5aa1a5222a87954aaedda574.jpg'))
nvazio2 = Image.open(os.path.join(caminho_traint, 'nvazio/5aa1cc732a87957e63f4ba9f.jpg'))

In [30]:
def monta_x1_batch(pil_image, batch_size):
    x1 = np.zeros((batch_size, *SIZE, 3), np.float32)
    img_array = np.array(pil_image.resize(SIZE, Image.ANTIALIAS)) / 255
    for ind in range(batch_size):
        x1[ind, :, :, :] = img_array
    return x1

def get_preds(generator, X_batch):
    generator.reset()
    y = []
    y_pred = []
    for r in range(len(validation_generator)):
        x2_batch, y_batch = next(validation_generator)
        pred_batch = similarity_model.predict([X_batch[:len(x2_batch)], x2_batch])
        y.extend(y_batch)
        y_pred.extend(list(pred_batch.reshape(-1)))
    return y, y_pred

def evaluate(generator, pil_image, inverso=False):
    X_batch = monta_x1_batch(pil_image, len(generator.filenames))
    y_test, y_pred = get_preds(generator, X_batch)
    if inverso:
        y_labels = np.array(y_pred) > 0.5
    else:
        y_labels = np.array(y_pred) <= 0.5
    print(sum(y_labels == y_test) / len(y_labels))
    return y_test, y_pred, y_labels 

In [31]:
%time y_test, y_pred, y_labels = evaluate(validation_generator, vazio1)

0.9602934829520933
CPU times: user 10.5 s, sys: 2.6 s, total: 13.1 s
Wall time: 15.4 s


In [34]:
%time y_test, y_predn, y_labelsn = evaluate(validation_generator, nvazio1, inverso=True)

0.9602934829520933
CPU times: user 9.61 s, sys: 2.07 s, total: 11.7 s
Wall time: 14 s


In [35]:
print(classification_report(y_test, y_labels, digits=4))

             precision    recall  f1-score   support

        0.0     1.0000    0.9211    0.9589      1166
        1.0     0.9260    1.0000    0.9616      1151

avg / total     0.9632    0.9603    0.9602      2317



In [36]:
print(confusion_matrix(validation_generator.labels, y_labels))

[[1074   92]
 [   0 1151]]


In [37]:
print(classification_report(y_test, y_labelsn, digits=4))

             precision    recall  f1-score   support

        0.0     0.9963    0.9245    0.9591      1166
        1.0     0.9287    0.9965    0.9614      1151

avg / total     0.9627    0.9603    0.9603      2317



In [38]:
print(confusion_matrix(validation_generator.labels, y_labelsn))

[[1078   88]
 [   4 1147]]


In [47]:
y_labels_media = np.array(y_pred) < np.array(y_predn)

In [48]:
print(classification_report(y_test, y_labels_media, digits=4))

             precision    recall  f1-score   support

        0.0     0.9972    0.9228    0.9586      1166
        1.0     0.9273    0.9974    0.9611      1151

avg / total     0.9625    0.9599    0.9598      2317



In [49]:
print(confusion_matrix(validation_generator.labels, y_labels_media))

[[1076   90]
 [   3 1148]]
