In [70]:
import json
import os
import tensorflow as tf
import keras
from keras import layers
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

Matplotlib is building the font cache; this may take a moment.


In [17]:
with open('./.kaggle/kaggle.json') as credentials_file:
    credentials_dict = json.load(credentials_file)
    os.environ['KAGGLE_USERNAME'] = credentials_dict['username']
    os.environ['KAGGLE_KEY'] = credentials_dict['key']
    import kaggle

In [None]:
dataset_name='moltean/fruits' # https://www.kaggle.com/datasets/moltean/fruits
kaggle.api.dataset_download_files(dataset_name, path='./dataset', force=False, quiet=True, unzip=True)
#Con variantes de apples y muchisimas frutas diferentes (9gb) https://www.kaggle.com/datasets/chrisfilo/fruit-recognition?select=Apple

## Prepare the dataset

In [92]:
image_size = (128,128) #The size to resize the images, ideally we might want to find the max or average size and scale the images considering its proportion to avoid too much cropping/resizing.
#Keep in mind image size needs to be used for the creation of the model too, since the input shape needs it.
number_of_channels = 3 #Used for the input shape of the model exclusively
batch_size = 64 # The batach size to use for training, it could be tuned.

def create_dataset(dataset_path) ->tf.data.Dataset:
    seed = 1 #fix the seed for shuffling to be able to compare between models.

    dataset = keras.utils.image_dataset_from_directory(
        dataset_path,
        labels='inferred', #Infer labels from directory
        label_mode="int", #use categorical to One hot encode labels which is more reasonable.
        color_mode='rgb', #if not so already, images will be converted to have 3 channels.
        batch_size=batch_size,
        image_size=image_size, #Resize images to 128x128
        shuffle= True,
        seed = seed,
        crop_to_aspect_ratio=True, #If the image has a different aspect ratio crop instead of distorting the image. I think fill might be an option
        #validation_split=0.2, #How much to save for validation
        #subset="training", #What fraction of the dataset to return.
    )

    return dataset.cache().prefetch(tf.data.AUTOTUNE), dataset.class_names

def save_class_names(class_names, filename):
    with open(filename, 'w') as file:
        json.dump(class_names, file)

In [94]:
dataset_images_base_path = './dataset/fruits-360-original-size/fruits-360-original-size/'
trainset, class_names = create_dataset(dataset_images_base_path+'Training')
testset, _ = create_dataset(dataset_images_base_path+'Test')
valset, _  = create_dataset(dataset_images_base_path+'Validation')

Found 6231 files belonging to 24 classes.
Found 3110 files belonging to 24 classes.
Found 3114 files belonging to 24 classes.


In [97]:
save_class_names(class_names, 'class-labels.json')

In [63]:
num_classes = 24
input_shape = (image_size[0], image_size[1], number_of_channels)
def create_data_augmentation_layer():
    return keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.2),
        layers.RandomZoom(0.2),
        layers.RandomContrast(0.2),
        layers.RandomHeight(0.2),
        layers.RandomWidth(0.2),
        layers.Resizing(image_size[0], image_size[1]) # Ensure the images have the same shape after augmentation
    ]
)

def create_model():
    model = tf.keras.Sequential([
    #create_data_augmentation_layer(),
    layers.Input(shape=input_shape), # Add Input layer
    layers.Rescaling(1./255),#,input_shape=input_shape),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.4),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer = keras.optimizers.Adam(learning_rate=0.01),
        loss = keras.losses.SparseCategoricalCrossentropy(), #Is this the right loss ? What would be the right one for one hot encoding which suits this case better?
        #metrics=METRICS
        metrics=["accuracy"]
    )
    return model

def create_model_and_fit():
        callbacks = [
            keras.callbacks.EarlyStopping(patience=5, monitor='val_loss', restore_best_weights=True, min_delta=1e-4), #Might wanna tweak min_delta, and min_lr 
            keras.callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=1e-5, patience=2, mode='min', verbose=1, factor=0.1, min_delta=1e-4), #Doesnt make too much sence that lr can drop so much below min_delta imo.
            keras.callbacks.ModelCheckpoint(monitor='val_loss', filepath='./best_model.h5', save_best_only=True)
        ]
        model = create_model()

        history = model.fit(trainset, epochs=50, batch_size=64,
                            callbacks=callbacks, validation_data=valset)
        
        return model, history

In [64]:
model, history = create_model_and_fit()

Epoch 1/100

  saving_api.save_model(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 10: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 15: ReduceLROnPlateau reducing learning rate to 9.999999310821295e-05.
Epoch 16/100
Epoch 17/100
Epoch 17: ReduceLROnPlateau reducing learning rate to 9.999999019782991e-06.
Epoch 18/100
Epoch 19/100
Epoch 19: ReduceLROnPlateau reducing learning rate to 9.99999883788405e-07.
Epoch 20/100
Epoch 21/100
Epoch 21: ReduceLROnPlateau reducing learning rate to 1e-07.
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
 2/98 [..............................] - ETA: 44s - los

KeyboardInterrupt: 

In [88]:
model = tf.keras.models.load_model('./best_model.h5')
# Evaluate the model
loss, accuracy = model.evaluate(testset)
print(f'Loss: {loss}, Accuracy: {accuracy}')

#To be able to examine the cause of problems youd do this instead.
#predictions = np.array([])
#labels =  np.array([])
#for x, y in testData:
#  predictions = np.concatenate([predictions, model.predict_classes(x)])
#  labels = np.concatenate([labels, np.argmax(y.numpy(), axis=-1)])
#might wanna get the text labels and predictions too here.

#tf.math.confusion_matrix(labels=labels, predictions=predictions).numpy()


Loss: 9.284500265493989e-05, Accuracy: 1.0
