In [None]:
import os
import shutil
import random
import numpy as np
from io import BytesIO
from PIL import Image
import requests
import urllib
from IPython.display import display

from sklearn.metrics import confusion_matrix
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator
from tensorflow import keras
import tensorflow.lite as tflite

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

dataset is taken from https://www.kaggle.com/alxmamaev/flowers-recognition

In [None]:
path = '../data/flowers/'

In [None]:
target_size = (150, 150)

In [None]:
img = load_img(f'{path}daisy/100080576_f52e8ee070_n.jpg', target_size=target_size)
img

# Split data in test, train, validation

In [None]:
flower_types = ['daisy', 'rose', 'tulip', 'dandelion', 'sunflower']

In [None]:
for flower_type in flower_types:
    images = os.listdir(os.path.join(path, flower_type))
    print(f'flower: {flower_type:>10} - number images: {len(images)}')

In [None]:
for folder in ['train', 'validation', 'test']:
    for flower_type in flower_types:
        directory = os.path.join(path, folder, flower_type)
        if not os.path.exists(directory):
            os.makedirs(directory)
            print(f'created dir {directory}')

In [None]:
test_size = 0.15
val_size = 0.15

for flower_type in flower_types:
    flower_type_path = os.path.join(path, flower_type)
    images = os.listdir(flower_type_path)
    
    total_num_images = len(images)
    random.seed(1)
    random.shuffle(images)
    
    num_test_images = int(total_num_images*test_size)
    num_val_images = int(total_num_images*val_size)
    num_train_images = total_num_images - num_test_images - num_val_images
    
    for i in range(num_test_images):
        img = images.pop()
        image_path = os.path.join(flower_type_path, img)
        shutil.copyfile(image_path, f'{path}/test/{flower_type}/{img}')
        
    for i in range(num_val_images):
        img = images.pop()
        image_path = os.path.join(flower_type_path, img)
        shutil.copyfile(image_path, f'{path}/validation/{flower_type}/{img}')
        
    for i in range(len(images)):
        img = images.pop()
        image_path = os.path.join(flower_type_path, img)
        shutil.copyfile(image_path, f'{path}/train/{flower_type}/{img}')

# EDA

## check imbalance

In [None]:
flower_dict = {}
for flower_type in flower_types:
    flower_dict[flower_type] = {}
    for folder in ['test', 'train', 'validation']:
        flower_path = os.path.join(path, folder, flower_type)
        if os.path.isdir(flower_path):
            images = os.listdir(flower_path)
            flower_dict[flower_type][folder] = len(images)
            print(f'flower: {flower_type:>10} - {folder} - number images: {len(images)}')

In [None]:
plt.bar(flower_types, [flower_dict[flower]['train'] for flower in flower_types], width=.5)
plt.title('training data')

the data set seems to be balanced

## investigate image sizes

In [None]:
def get_dims(img_path):
    img = Image.open(img_path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    arr = np.array(img, dtype='float32')
    h, w, d = arr.shape
    return h, w

In [None]:
size_dict = {}
for flower_type in flower_types:
    size_dict[flower_type] = []
    for folder in ['test', 'train', 'validation']:
        flower_path = os.path.join(path, folder, flower_type)
        if os.path.isdir(flower_path):
            images = os.listdir(flower_path)
            for image in images:
                h, w = get_dims(f'{flower_path}/{image}')
                size_dict[flower_type].append({'hight': h, 'width': w})

In [None]:
for flower in flower_types:
    hights = [pic['hight'] for pic in size_dict[flower]]
    widths = [pic['width'] for pic in size_dict[flower]]
    plt.scatter(widths, hights)
    plt.xlabel('width')
    plt.ylabel('hights')
    plt.title(f'{flower} pic sizes (pixels)')
    plt.pause(0.1)

size distribution between the different flower types seems similar

## show random images

In [None]:
for flower_type in flower_types:
    for folder in ['test', 'train', 'validation']:
        flower_path = os.path.join(path, folder, flower_type)
        if os.path.isdir(flower_path):
            print(flower_path)
            images = os.listdir(flower_path)
            random.seed(1)
            random.shuffle(images)
            f, axarr = plt.subplots(1, 4, figsize=(10, 20))
            for i, img in enumerate(images[:4]):
                axarr[i].imshow(load_img(f'{flower_path}/{img}', target_size=target_size))
                axarr[i].axis('off')
            plt.pause(0.1)

almost all images look good

# linear model

# start with easy model

In [None]:
def make_model():
    inputs = keras.Input(shape=(target_size[0], target_size[1], 3))
    conv = keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(inputs)
    pooling = keras.layers.MaxPool2D(strides=(2, 2))(conv)
    flatten = keras.layers.Flatten()(pooling)
    dense = keras.layers.Dense(64, activation='relu')(flatten)
    outputs = keras.layers.Dense(len(flower_types), activation='softmax')(dense)
    model = keras.Model(inputs, outputs)
    

    optimizer = keras.optimizers.SGD(lr=0.002, momentum=0.8)
    loss = keras.losses.CategoricalCrossentropy(from_logits=False)

    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy']
    )
    return model

In [None]:
initial_model = make_model()

In [None]:
train_generator = ImageDataGenerator(rescale=1./255)

train_data = train_generator.flow_from_directory(
    f'{path}train/',
    target_size=target_size,
    batch_size=20
)

In [None]:
val_generator = ImageDataGenerator(rescale=1./255)

val_data = val_generator.flow_from_directory(
    f'{path}validation',
    target_size=target_size,
    batch_size=20,
    shuffle=True
)

In [None]:
train_data.class_mode

In [None]:
flower_types_pred_dict = {v:k for k, v in train_data.class_indices.items()}

In [None]:
history = initial_model.fit(
    train_data,
    steps_per_epoch=100,
    epochs=10,
    validation_data=val_data,
    validation_steps=10
)

In [None]:
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['accuracy'])

# parameter tuning

In [None]:
def build_model(conv_layers=1, learning_rate=3e-3, dropout_rate=0.2):
    inputs = keras.Input(shape=(target_size[0], target_size[1], 3))
    
    conv = keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(inputs)
    pooling = keras.layers.MaxPool2D(strides=(2, 2))(conv)
    
    for layer in range(conv_layers):
        conv = keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(pooling)
        pooling = keras.layers.MaxPool2D(strides=(2, 2))(conv)
    
    
    flatten = keras.layers.Flatten()(pooling)
    dropout = keras.layers.Dropout(rate=dropout_rate)(flatten)
    dense = keras.layers.Dense(64, activation='relu')(dropout)
    outputs = keras.layers.Dense(len(flower_types), activation='softmax')(dense)
    
    
    model = keras.Model(inputs, outputs)
    

    optimizer = keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999)
    loss = keras.losses.CategoricalCrossentropy(from_logits=False)

    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy']
    )
    return model

In [None]:
chechpoint = keras.callbacks.ModelCheckpoint(
'flower_model_{epoch:02d}_{val_accuracy:.3f}.h5',
save_best_only=True,
monitor='val_accuracy',
mode='max')

In [None]:
model = build_model()

In [None]:
history = model.fit(
    train_data,
    steps_per_epoch=100,
    epochs=10,
    validation_data=val_data,
    validation_steps=10,
    callbacks=[chechpoint]
)

In [None]:
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['accuracy'])

In [None]:
scores = {}
for lr in [0.0001, 0.001, 0.01]:
    print(lr)
    m = build_model(learning_rate=lr)
    history = m.fit(
    train_data,
    steps_per_epoch=80,
    epochs=8,
    validation_data=val_data,
    validation_steps=10,
    callbacks=[chechpoint])
    scores[lr] = history.history
    print()

In [None]:
for lr, hist in scores.items():
    plt.plot(hist['val_accuracy'], label=('val=%s' % lr))

plt.legend()

In [None]:
scores = {}
for dr in [0.1, 0.2]:
    print(dr)
    m = build_model(dropout_rate=dr)
    history = m.fit(
    train_data,
    steps_per_epoch=80,
    epochs=8,
    validation_data=val_data,
    validation_steps=10,
    callbacks=[chechpoint])
    scores[dr] = history.history
    print()

In [None]:
for dr, hist in scores.items():
    plt.plot(hist['val_accuracy'], label=('val=%s' % dr))

plt.legend()

In [None]:
scores = {}
for layer in [1, 2]:
    print(layer)
    m = build_model(conv_layers=layer)
    history = m.fit(
    train_data,
    steps_per_epoch=80,
    epochs=8,
    validation_data=val_data,
    validation_steps=10,
    callbacks=[chechpoint])
    scores[layer] = history.history
    print()

In [None]:
for layer, hist in scores.items():
    plt.plot(hist['val_accuracy'], label=('val=%s' % layer))

plt.legend()

# use test set to check the model

In [None]:
test_generator = ImageDataGenerator(rescale=1./255)

test_data = test_generator.flow_from_directory(
    f'{path}test',
    target_size=target_size,
    batch_size=20,
    class_mode='categorical',
    shuffle=False
)

In [None]:
pred_test = model.predict_generator(test_data)
predicted_class_indices_test = np.argmax(pred_test,axis=1)
predictions = [flower_types_pred_dict[p] for p in predicted_class_indices_test]

In [None]:
cm = confusion_matrix([flower_types_pred_dict[p] for p in predicted_class_indices_test], 
                 [flower_types_pred_dict[l] for l in test_data.labels], 
                 labels=list(flower_types_pred_dict.values()))

In [None]:
print(f'accuracy: {sum(predicted_class_indices_test == test_data.labels) / len(predicted_class_indices_test)}')

In [None]:
f, ax = plt.subplots(1, 1, figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='g', ax=ax, linewidths=0.1)
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(list(flower_types_pred_dict.values()))
ax.yaxis.set_ticklabels(list(flower_types_pred_dict.values()))

# save model

In [None]:
model_path = '../models/flowers-model-v1.tflite'

In [None]:
converter = tflite.TFLiteConverter.from_keras_model(model)

tflite_model = converter.convert()

with open(model_path, 'wb') as f_out:
    f_out.write(tflite_model)

# make a prediction

In [None]:
interpreter = tflite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]['index']
output_index = interpreter.get_output_details()[0]['index']

In [None]:
def prepare_image(img_path, target_size):
    with open(img_path, 'rb') as file:
        img = file.read()
        stream = BytesIO(img)
        img = Image.open(stream)
    
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = img.resize(target_size, Image.NEAREST)
    
    x = np.array(img, dtype='float32')
    X = np.array([x])
    X /= 255.0
    
    return X

In [None]:
test_image = f'{path}test/rose/1461381091_aaaa663bbe_n.jpg'
load_img(test_image, target_size=target_size)

In [None]:
X = prepare_image(test_image, target_size)

In [None]:
interpreter.set_tensor(input_index, X)
interpreter.invoke()
preds = interpreter.get_tensor(output_index)

In [None]:
flower_types_pred_dict

In [None]:
flower_types_pred_dict[preds.argmax()]

# test local API

In [None]:
def download_image(url):
    with urllib.request.urlopen(url) as resp:
        buffer = resp.read()
    stream = BytesIO(buffer)
    img = Image.open(stream)
    return img

def resize_image(image, target_size):
    if image.mode != 'RGB':
        image = image.convert('RGB')
    image = image.resize(target_size, Image.NEAREST)
    return image

In [None]:
image_urls = [
    'https://upload.wikimedia.org/wikipedia/commons/c/cc/Hundsrose.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/a/a8/Tulipa_cinnabarina_subsp_cinnabarina.png',
    'https://upload.wikimedia.org/wikipedia/commons/8/85/Tulipa_praestans1.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/e/ea/Tulipa_suaveolens_floriade_to_Canberra.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/e/eb/Wild_Rosa_gallica_Romania.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/0/0c/Rosa_Ave_Maria_1.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/f/f7/2010_sonnenblume_%28Helianthus_annuus%29.JPG',
    'https://upload.wikimedia.org/wikipedia/commons/e/e9/Sonsbeck_-_agri_06_ies.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/d/d6/Taraxacum_officinale_focused.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/8/85/01_pusteblume.jpg',
    'https://upload.wikimedia.org/wikipedia/commons/5/5a/Bellis_perennis-fully_bloomed_flower.jpg'    
]

In [None]:
for url in image_urls:
    print(url)
    img = download_image(url)
    img = resize_image(img, target_size)
    display(img)

In [None]:
url = 'http://192.168.178.36:9696/predict'
image_url = 'https://upload.wikimedia.org/wikipedia/commons/a/a8/Tulipa_cinnabarina_subsp_cinnabarina.png'

print(image_url)
img = download_image(image_url)
img = resize_image(img, target_size)
display(img)
flower_pic_url = {'url': image_url}
resp = requests.post(url, json=flower_pic_url).json()
print(resp)
print(f"It is a {resp['flower']}")
print()

# test Heroku APP

In [None]:
url = 'https://flower-types.herokuapp.com/predict'
flower_pic_url = {'url': 'https://upload.wikimedia.org/wikipedia/commons/c/cc/Hundsrose.jpg'}

requests.post(url, json=flower_pic_url).json()

In [None]:
for image_url in image_urls:
    print(image_url)
    img = download_image(image_url)
    img = resize_image(img, target_size)
    display(img)
    flower_pic_url = {'url': image_url}
    resp = requests.post(url, json=flower_pic_url).json()
    print(resp)
    print(f"It is a {resp['flower']}")
    print()