# 1. Данные


Подключаем библиотеки


In [None]:
import shutil #Библиотека для обработки файлов, групп файлов, и папок
import os #Библиотека для работы файловой системы
import zipfile #Библиотека для работы zip-файлов
from google.colab import drive #Модуль для работы Google Disk
from PIL import Image #Модуль для работы с изображениями

Получаем  ссылку для аутентификации Google-Disk


In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


Находим нужный архив с датасетом и извлекаем все

In [None]:
zip_file = '/content/drive/MyDrive/CNN_mango/mango_dataset.zip'
z = zipfile.ZipFile(zip_file, 'r')
z.extractall()

In [None]:
# Каталог с набором данных
data_dir = '/content/mango_dataset'
# Каталог с данными для обучения
train_dir = 'train'
# Каталог с данными для проверки
val_dir = 'val'
# Каталог с данными для тестирования
test_dir = 'test'
# Часть набора данных для проверки
val_data_portion = 0.2
# Часть набора данных для тестирования
test_data_portion = 0.1
# Количество элементов данных в одном классе
nb_images = 500

Функция для создания train, test, val директорий

In [None]:
def create_directory(dir_name):
    if os.path.exists(dir_name):
        shutil.rmtree(dir_name)
    os.makedirs(dir_name)
    os.makedirs(os.path.join(dir_name, "Anthracnose"))
    os.makedirs(os.path.join(dir_name, "Bacterial Canker"))
    os.makedirs(os.path.join(dir_name, "Cutting Weevil"))
    os.makedirs(os.path.join(dir_name, "Die Back"))
    os.makedirs(os.path.join(dir_name, "Gall Midge"))
    os.makedirs(os.path.join(dir_name, "Healthy"))
    os.makedirs(os.path.join(dir_name, "Powdery Mildew"))
    os.makedirs(os.path.join(dir_name, "Sooty Mould"))

In [None]:
create_directory(train_dir) # Создание train-директории
create_directory(val_dir) # Создание val-директории
create_directory(test_dir)

Функция для копирования данных в нужную директорию

In [None]:
def copy_images(start_index, end_index, source_dir, dest_dir):
    for i in range(start_index+1, end_index+1):
        shutil.copy2(os.path.join(source_dir ,  "Anthracnose (" + str(i) + ").jpg"),
                    os.path.join(dest_dir, "Anthracnose")) #1

        shutil.copy2(os.path.join(source_dir ,  "Bacterial Canker (" + str(i) + ").jpg"),
                   os.path.join(dest_dir, "Bacterial Canker")) #2

        shutil.copy2(os.path.join(source_dir ,  "Cutting Weevil (" + str(i) + ").jpg"),
                   os.path.join(dest_dir, "Cutting Weevil")) #3

        shutil.copy2(os.path.join(source_dir ,  "Die Back (" + str(i) + ").jpg"),
                   os.path.join(dest_dir, "Die Back")) #4

        shutil.copy2(os.path.join(source_dir,  "Gall Midge (" + str(i) + ").jpg"),
                   os.path.join(dest_dir , "Gall Midge")) #5

        shutil.copy2(os.path.join(source_dir,  "Healthy (" + str(i) + ").jpg"),
                   os.path.join(dest_dir , "Healthy")) #6

        shutil.copy2(os.path.join(source_dir,  "Powdery Mildew (" + str(i) + ").jpg"),
                   os.path.join(dest_dir , "Powdery Mildew")) #7

        shutil.copy2(os.path.join(source_dir ,  "Sooty Mould (" + str(i) + ").jpg"),
                   os.path.join(dest_dir, "Sooty Mould")) #8

Высчитываем вспомогательные данные

In [None]:
start_val_data_idx = int(nb_images * (1 - val_data_portion - test_data_portion))
start_test_data_idx = int(nb_images * (1 - test_data_portion))
print(start_val_data_idx)
print(start_test_data_idx)

350
450


In [None]:
copy_images(0, start_val_data_idx, data_dir, train_dir)
copy_images(start_val_data_idx, start_test_data_idx, data_dir, val_dir)
copy_images(start_test_data_idx, nb_images, data_dir, test_dir)

# 2. Создание модели InceptionV3. Загрузка изображений в модель. Сохрание изображений.

> Блок с отступами



In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD


import numpy as np
import pandas as pd
import h5py

import matplotlib.pyplot as plt

In [None]:
# Каталог с данными для обучения
train_dir = 'train'
# Каталог с данными для проверки
val_dir = 'val'
# Каталог с данными для тестирования
test_dir = 'test'
# Размеры изображения
img_width, img_height = 224, 224
# Размерность тензора на основе изображения для входных данных в нейронную сеть
# backend Tensorflow, channels_last
input_shape = (img_width, img_height, 3)
# Размер мини-выборки
batch_size = 16
# Количество изображений для обучения
nb_train_samples = 2800
# Количество изображений для проверки
nb_validation_samples = 800
# Количество изображений для тестирования
nb_test_samples = 400


Создание предобученной сети InceptionV3


In [None]:
InceptionV3_model=InceptionV3(include_top=False,
                      weights='imagenet',
                      input_shape=((224, 224, 3)))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
datagen = ImageDataGenerator(rescale=1. / 255) #Создание генератора

In [None]:
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    classes = ['Anthracnose','Bacterial Canker','Cutting Weevil','Die Back','Gall Midge','Healthy','Powdery Mildew','Sooty Mould'],
    class_mode='categorical')

Found 2800 images belonging to 8 classes.


In [None]:
val_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    classes = ['Anthracnose','Bacterial Canker','Cutting Weevil','Die Back','Gall Midge','Healthy','Powdery Mildew','Sooty Mould'],
    class_mode='categorical')

Found 800 images belonging to 8 classes.


In [None]:
bottleneck_features_train = InceptionV3_model.predict_generator(train_generator, nb_train_samples)
np.save(open('bn_features_train.npy', 'wb'), bottleneck_features_train)

bottleneck_features_validation = InceptionV3_model.predict_generator(val_generator, nb_validation_samples)
np.save(open('bn_features_validation.npy', 'wb'), bottleneck_features_validation)

  bottleneck_features_train = InceptionV3_model.predict_generator(train_generator, nb_train_samples)
  bottleneck_features_validation = InceptionV3_model.predict_generator(val_generator, nb_validation_samples)


# 3. Создание верхней части модели. Загрузка данных в модель. Сохрание данных.

In [None]:
train_data = np.load(open('bn_features_train.npy', 'rb'))
train_labels = np.array([0] * 350 + [1] * 350 + [2] * 350 + [3] * 350 + [4] * 350 + [5] * 350 + [6] * 350 + [7] * 350)

validation_data = np.load(open('bn_features_validation.npy', 'rb'))
validation_labels = np.array([0] * 100 + [1] * 100 + [2] * 100 + [3] * 100 + [4] * 100 + [5] * 100 + [6] * 100 + [7] * 100)

Создадание модели FFN сети и её компиляция

In [None]:
# Создание
fc_model = Sequential()
fc_model.add(Flatten(input_shape=train_data.shape[1:]))
fc_model.add(Dense(64, activation='relu')) # Первый Dense-слой
fc_model.add(Dropout(0.5, name='dropout_one')) # Первый Dropout-слой
fc_model.add(Dense(64, activation='relu')) # Второй Dense-слой
fc_model.add(Dropout(0.5, name='dropout_two')) # Второй Dropout-слой
fc_model.add(Dense(8, activation='softmax')) # Выходной слой
# Компиляция
fc_model.compile(optimizer='rmsprop',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
fc_model.fit(train_data, train_labels,
            epochs=50, batch_size=16,
            validation_data=(validation_data, validation_labels))

fc_model.save_weights('fc_InceptionV3_mango.hdf5') # сохраняем веса

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
fc_model.evaluate(validation_data, validation_labels)



[2.0794429779052734, 0.125]

# 4. Создание итоговой модели, загрузка в неё аугментированных данных, сохрание весов

In [None]:
weights_filename='fc_InceptionV3_mango.hdf5'

x = Flatten()(InceptionV3_model.output)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
top_model=Dense(8, activation='softmax')(x)
model = Model(InceptionV3_model.input, top_model)
model.load_weights(weights_filename, by_name=True)

In [None]:
for layer in InceptionV3_model.layers[:205]:
    layer.trainable = False

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

  super().__init__(name, **kwargs)


Сделаем так, чтобы в процессе обучения сохранялись только веса с наибольшей точностью на тестовой выборке:

In [None]:
filepath="weights-improvement.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator2 = train_datagen.flow_from_directory(
        train_dir,
        target_size=(224, 224),
        batch_size=16,
        class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
        val_dir,
        target_size=(224, 224),
        batch_size=16,
        class_mode='categorical')


Found 2800 images belonging to 8 classes.
Found 800 images belonging to 8 classes.


In [None]:
  model.fit_generator(
        train_generator2,
        steps_per_epoch=nb_train_samples // 16,
        epochs=50,
        validation_data=validation_generator,
         validation_steps=nb_validation_samples // 16,
        callbacks=callbacks_list)

  model.fit_generator(


Epoch 1/50
Epoch 1: val_accuracy improved from -inf to 0.46125, saving model to weights-improvement.h5
Epoch 2/50
Epoch 2: val_accuracy improved from 0.46125 to 0.56875, saving model to weights-improvement.h5
Epoch 3/50
Epoch 3: val_accuracy improved from 0.56875 to 0.68875, saving model to weights-improvement.h5
Epoch 4/50
Epoch 4: val_accuracy improved from 0.68875 to 0.80625, saving model to weights-improvement.h5
Epoch 5/50
Epoch 5: val_accuracy improved from 0.80625 to 0.84250, saving model to weights-improvement.h5
Epoch 6/50
Epoch 6: val_accuracy improved from 0.84250 to 0.84750, saving model to weights-improvement.h5
Epoch 7/50
Epoch 7: val_accuracy improved from 0.84750 to 0.88125, saving model to weights-improvement.h5
Epoch 8/50
Epoch 8: val_accuracy did not improve from 0.88125
Epoch 9/50
Epoch 9: val_accuracy improved from 0.88125 to 0.89750, saving model to weights-improvement.h5
Epoch 10/50
Epoch 10: val_accuracy did not improve from 0.89750
Epoch 11/50
Epoch 11: val_acc

<keras.callbacks.History at 0x7f69f10e83a0>

Сохранение модели в h5 формате

In [None]:
model.save('Mango_InceptionV3.h5')

Сохранение модели в SavedModel формате

In [None]:
model.save('Mango_InceptionV3')
path = 'Mango_InceptionV3'
file_dir = os.listdir(path)

with zipfile.ZipFile('Mango_InceptionV3.zip', mode='w', \
                     compression=zipfile.ZIP_DEFLATED) as zf:
    for file in file_dir:
        add_file = os.path.join(path, file)
        zf.write(add_file)



#5. Оценка точности модели

In [None]:
pred_generator=test_datagen.flow_from_directory( test_dir, target_size=(224,224), batch_size=16, class_mode='categorical')
scores = model.evaluate_generator(pred_generator, 25)
print("Аккуратность на тестовых данных: %.2f%%" % (scores[1]*100))

Found 400 images belonging to 8 classes.


  scores = model.evaluate_generator(pred_generator, 25)


Аккуратность на тестовых данных: 97.00%


In [None]:
print(model.history.history.params)

AttributeError: ignored

In [None]:
tr_acc = model.history.history['accuracy']
tr_loss = model.history.history['loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

# Plot training history
plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()

KeyError: ignored

Проиллюстрируем работу модели