In [None]:
import os
import tensorflow
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger
from tensorflow.keras import regularizers
from PIL import Image

MUSHROOMS_PATH = 'mushrooms_dataset'

# Directory for the images and its subdirectories
images_dir = os.path.join(MUSHROOMS_PATH, 'images_FasterRCNN', 'images_correct')
subdirs = [os.path.join(images_dir, subdir) for subdir in os.listdir(images_dir) if os.path.isdir(os.path.join(images_dir, subdir))]

In [None]:
#Now we have some ideas for dividing the dataset into training and testing sets. We can use the train_test_split function from scikit-learn to divide the dataset into training and testing sets.
#But for that we will have to put the every image into array and then into a dataframe
#Then we will have to use ImageDataGenerator and flow_from_dataframe to load the images from the dataframe

#Second idea is to manually create the test set by taking 20% of the images from each class and putting them into a separate directory.
#We will then use ImageDataGenerator and flow_from_directory to load the images from the directory.

#In both ideas we need to take in consider stratification, so that the distribution of classes in the training and testing sets is similar.
#For example, if in one class there are 10 images and in another one there are 8 images, we want both  of them to have the same percentage of images in the training and testing sets.

#Third idea is to use the splitfolders library to divide the dataset into training and testing sets.
#But again we have to stratify the dataset which is not supported by that library.

#So the first idea might require a lot of memory usage, the second idea needs us to well do this manually which is not very efficient.
#And the third idea is not supporting stratification.

#So for now we will use the first idea and divide the dataset into training and testing sets using the train_test_split function from scikit-learn which has the stratify parameter.


In [None]:
#So the process with the first idea is as follows:
#1. Load the images and its corresponding labels into a dataframe.
#2. Divide the dataset into training and testing sets using the train_test_split function from scikit-learn with stratification.
#3. Use ImageDataGenerator and flow_from_dataframe to load the images from the dataframe.

In [None]:
len(subdirs) # 181

In [None]:
data = []
for subdir in subdirs:
    label = os.path.basename(subdir) # we specify the label for each image
    for filename in os.listdir(subdir):
        if filename.endswith('.jpg'):
            data.append((os.path.join(subdir, filename), label)) # we need to include whole path of the image for using flow_from_dataframe because it reads the images directly from the file system using the paths provided in the DataFrame.
data_df = pd.DataFrame(data, columns=['filename', 'label'])

In [None]:
data_df.head()

In [None]:
train_df, test_df = train_test_split(data_df, test_size=0.2, stratify=data_df['label'], random_state=42)

In [None]:
# We use 25% from the 80% of the training set as the validation set which will be the same amount as the testing set
datagen = ImageDataGenerator( 
    rescale=1./255,
    rotation_range=30,
    zoom_range=0.2,
    shear_range=0.2,
    width_shift_range=0.3,
    height_shift_range=0.3,
    horizontal_flip=True,
    validation_split=0.25
) 

train_data = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filename',
    y_col='label',
    target_size=(299, 299),
    class_mode='categorical',
    batch_size=16,
    subset='training'
)

val_data = datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filename',
    y_col='label',
    target_size=(299, 299),
    class_mode='categorical',
    batch_size=16,
    subset='validation'
)

datagen_test = ImageDataGenerator(rescale=1./255)

test_data = datagen_test.flow_from_dataframe(
    dataframe=test_df,
    x_col='filename',
    y_col='label',
    target_size=(299, 299),
    class_mode='categorical',
    batch_size=16
)

Since we will use couple of models to compare each ones results, its good to create a function for saving model

In [None]:
from tensorflow.saved_model import save
def saveModel(model, model_name):
    if not os.path.exists('models'):
        os.makedirs('models')
    model.save(f'models/{model_name}.h5') #for saving the model in h5 format
    model.export(f'models/{model_name}') #for saving the model in saved_model format

Training on InceptionV3 model

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.6)(x)
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.1))(x)
x = BatchNormalization()(x)
x = Dropout(0.6)(x)

predictions = Dense(181, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.summary()

optimizer = Adam(learning_rate=0.0001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) # if the validation loss does not improve for 10 epochs, the training will stop
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, min_lr=0.000001) # if the validation loss does not improve for 3 epochs, the learning rate will be reduced by a factor of 0.1
csv_logger = CSVLogger('model_training.log') # saving the training log to a file

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_data, validation_data=val_data, epochs=20, callbacks=[early_stopping])

In [None]:
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

In [None]:
# Unfreezing layers from 289
for layer in model.layers[:289]:
   layer.trainable = False
for layer in model.layers[289:]:
   layer.trainable = True

In [None]:
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(train_data, validation_data=val_data, epochs=60, callbacks=[early_stopping, reduce_lr])

In [None]:
#Evaluate the model
model.evaluate(test_data)

In [None]:
#Save the model
saveModel(model, 'inception_v3_mushroomsv1_5_0')

In [None]:
# Command for running the model with TensorFlow Serving
# $ docker pull tensorflow/serving:latest-gpu - for pulling the image of TensorFlow Serving
# $ docker run --rm -p 8501:8501 --name tfserving_inception -v "C:\Users\Adam\Desktop\FungEye\FungEye\FungEyeAi\models\inception_v3_mushroomsv1_5_5\:/models/inception/1" -e MODEL_NAME=inception tensorflow/serving:latest-gpu

In [None]:
import json, requests

def predict_image(image_path):
    # preprocess the image
    image = Image.open(image_path)
    image = image.resize((299, 299))
    image = np.array(image) / 255.0
    image = image.reshape(1, 299, 299, 3)

    # specify the endpoint and make the request
    endpoint = 'http://localhost:8501/v1/models/inception:predict'
    headers = {'Content-Type': 'application/json'}
    batch_json = {'signature_name': 'serving_default', 'instances': image.tolist()} #we need to convert the image to a list because the model expects a list of instances

    response = requests.post(endpoint, json=batch_json, headers=headers)
    predictions = json.loads(response.text)['predictions']

    # lets make the predictions more readable, i have a list of class names in the mushroom_names.txt file and we can combine the class names with the predictions
    prediction_list = []
    with open('mushrooms_dataset/final_mushroom_list.txt', 'r') as file:
        class_names = file.read().splitlines()
        for i, prediction in enumerate(predictions[0]):
            prediction_list.append((class_names[i], prediction))

    # Sort the predictions by probability
    prediction_list.sort(key=lambda x: x[1], reverse=True)
    return prediction_list[:5]

predict_image('mushrooms_dataset/borowik_szlachetny.jpg')

In [None]:
converter = tensorflow.lite.TFLiteConverter.from_saved_model("models/inception_v3_mushroomsv1_3_9")
tflite_model = converter.convert()

with open('models/inception_v3_mushroomsv1_3_9.tflite', 'wb') as f:
    f.write(tflite_model)