In [1]:
import os
import shutil

# Source directories
train_dir = '/app/GroceryStoreDataset/dataset/train-1'
validation_dir = '/app/GroceryStoreDataset/dataset/test-1'  # The separate validation directory

# Destination directory
combined_dir = '/app/GroceryStoreDataset/dataset/combined'

# Ensure the combined directory exists
if not os.path.exists(combined_dir):
    os.makedirs(combined_dir)

# Function to copy images from source to destination
def copy_images(src_dir, dest_dir):
    for class_name in os.listdir(src_dir):
        class_src_path = os.path.join(src_dir, class_name)
        class_dest_path = os.path.join(dest_dir, class_name)

        # Create class directory in destination if it doesn't exist
        if not os.path.exists(class_dest_path):
            os.makedirs(class_dest_path)

        # Copy images
        for image_name in os.listdir(class_src_path):
            src_image_path = os.path.join(class_src_path, image_name)
            dest_image_path = os.path.join(class_dest_path, image_name)
            shutil.copyfile(src_image_path, dest_image_path)

# Copy images from training directory
copy_images(train_dir, combined_dir)

# Copy images from validation directory
copy_images(validation_dir, combined_dir)

print("Images combined successfully.")


Images combined successfully.


In [31]:
import tensorflow as tf
import os






base_dir = '/app/GroceryStoreDataset/dataset'
data_dir = os.path.join(base_dir, 'combined')

datagen =  tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)


train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training',
    seed=42
)
validation_generator =datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    seed=42
)




Found 2142 images belonging to 81 classes.
Found 498 images belonging to 81 classes.


In [5]:
import tensorflow as tf
import os






base_dir = '/app/GroceryStoreDataset/dataset'
data_dir = os.path.join(base_dir, 'train-1')

datagen =  tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)


train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)
validation_generator =datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)




Found 2142 images belonging to 81 classes.
Found 498 images belonging to 81 classes.


In [7]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import os



# Load the VGG16 model pre-trained on ImageNet, without the top layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add a fully-connected layer
x = Dense(1024, activation='relu')(x)

# Add a logistic layer for the number of classes
num_classes = 81 # Assuming each sub-directory in train_dir is a class
predictions = Dense(num_classes, activation='softmax')(x)

# Define the model
model = Model(inputs=base_model.input, outputs=predictions)


# Freeze the layers of the VGG16 base model
for layer in base_model.layers:
    layer.trainable = False


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

checkpoint_callback = ModelCheckpoint(
    filepath='/app/vg16.keras',
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,
    verbose=1,
    restore_best_weights=True
)

In [10]:
hist = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=20,
    callbacks=[checkpoint_callback, early_stopping_callback]
)

Epoch 1/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.7404 - loss: 1.0210
Epoch 1: val_accuracy did not improve from 0.88889
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m459s[0m 7s/step - accuracy: 0.7402 - loss: 1.0212 - val_accuracy: 0.7000 - val_loss: 1.0810
Epoch 2/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:08[0m 8s/step - accuracy: 0.7188 - loss: 0.9067

  self.gen.throw(value)



Epoch 2: val_accuracy did not improve from 0.88889
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 58ms/step - accuracy: 0.7188 - loss: 0.9067 - val_accuracy: 0.5000 - val_loss: 1.4648
Epoch 3/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.7505 - loss: 0.9808
Epoch 3: val_accuracy did not improve from 0.88889
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m531s[0m 8s/step - accuracy: 0.7504 - loss: 0.9807 - val_accuracy: 0.7167 - val_loss: 1.0441
Epoch 4/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:40[0m 6s/step - accuracy: 0.6562 - loss: 1.1227
Epoch 4: val_accuracy did not improve from 0.88889
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 56ms/step - accuracy: 0.6562 - loss: 1.1227 - val_accuracy: 0.7778 - val_loss: 0.9796
Epoch 5/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.7730 - loss: 0.8895
Epoch 5: val_accuracy did not improve from 0.88889

2024-08-05 00:33:54.428707: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67s/step - accuracy: 0.8393 - loss: 0.6221  
Epoch 13: val_accuracy did not improve from 0.94444
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4497s[0m 69s/step - accuracy: 0.8390 - loss: 0.6225 - val_accuracy: 0.7542 - val_loss: 0.8404
Epoch 14/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:32[0m 6s/step - accuracy: 0.8438 - loss: 0.6649
Epoch 14: val_accuracy did not improve from 0.94444
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 53ms/step - accuracy: 0.8438 - loss: 0.6649 - val_accuracy: 0.7778 - val_loss: 0.8357
Epoch 15/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.8129 - loss: 0.6823
Epoch 15: val_accuracy did not improve from 0.94444
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m494s[0m 7s/step - accuracy: 0.8131 - loss: 0.6817 - val_accuracy: 0.7896 - val_loss: 0.8197
Epoch 16/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0

In [8]:
'''for layer in base_model.layers[-4:]:
    layer.trainable = True

# Compile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

# Continue training the model
hist = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=20,
    callbacks=[checkpoint_callback, early_stopping_callback]
)

# Save the final model
model.save('/app/vg16.keras')'''

Epoch 1/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - accuracy: 0.7810 - loss: 0.8229
Epoch 1: val_accuracy did not improve from 0.83333
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m524s[0m 8s/step - accuracy: 0.7813 - loss: 0.8219 - val_accuracy: 0.7729 - val_loss: 0.7931
Epoch 2/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:54[0m 8s/step - accuracy: 0.6250 - loss: 0.9863

  self.gen.throw(value)



Epoch 2: val_accuracy did not improve from 0.83333
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 68ms/step - accuracy: 0.6250 - loss: 0.9863 - val_accuracy: 0.6667 - val_loss: 0.9851
Epoch 3/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.8334 - loss: 0.5975
Epoch 3: val_accuracy did not improve from 0.83333
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m562s[0m 8s/step - accuracy: 0.8336 - loss: 0.5969 - val_accuracy: 0.7958 - val_loss: 0.6876
Epoch 4/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m6:34[0m 6s/step - accuracy: 0.9062 - loss: 0.3933
Epoch 4: val_accuracy did not improve from 0.83333
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - accuracy: 0.9062 - loss: 0.3933 - val_accuracy: 0.8333 - val_loss: 0.5891
Epoch 5/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.8688 - loss: 0.4444
Epoch 5: val_accuracy did not improve from 0.83333


2024-08-04 19:50:33.760883: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - accuracy: 0.9052 - loss: 0.3097
Epoch 13: val_accuracy did not improve from 1.00000
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m560s[0m 8s/step - accuracy: 0.9052 - loss: 0.3098 - val_accuracy: 0.8521 - val_loss: 0.4597
Epoch 14/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:43[0m 7s/step - accuracy: 0.9375 - loss: 0.2837
Epoch 14: val_accuracy did not improve from 1.00000
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 52ms/step - accuracy: 0.9375 - loss: 0.2837 - val_accuracy: 1.0000 - val_loss: 0.3554
Epoch 15/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8s/step - accuracy: 0.9113 - loss: 0.2963
Epoch 15: val_accuracy did not improve from 1.00000
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m596s[0m 9s/step - accuracy: 0.9114 - loss: 0.2962 - val_accuracy: 0.8792 - val_loss: 0.3824
Epoch 16/20
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [

In [15]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

best_model = load_model('/app/vg16.keras')
test_datagen = ImageDataGenerator(rescale=1.0/255)
base_dir = '/app/GroceryStoreDataset/dataset'
data_dir = os.path.join(base_dir, 'test-1')

test_generator = test_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Important to not shuffle the data for evaluation
)
# Get the ground truth labels
true_labels = test_generator.classes

# Get the class indices
class_indices = test_generator.class_indices

# Get the list of class names
class_names = list(class_indices.keys())

# Predict on the test data
predictions = model.predict(test_generator, steps=len(test_generator))
predicted_labels = predictions.argmax(axis=-1)



Found 2485 images belonging to 81 classes.


  self._warn_if_super_not_called()


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m491s[0m 6s/step


In [17]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)

# Calculate F1 score (macro, micro, or weighted)
f1 = f1_score(true_labels, predicted_labels, average='weighted')

# Print classification report
report = classification_report(true_labels, predicted_labels, target_names=class_names)

print(f'Accuracy: {accuracy}')
print(f'F1 Score: {f1}')
print('Classification Report:')
print(report)


Accuracy: 0.5175050301810865
F1 Score: 0.5006692805579736
Classification Report:
                                        precision    recall  f1-score   support

              Alpro-Blueberry-Soyghurt       0.55      1.00      0.71        28
                  Alpro-Fresh-Soy-Milk       0.83      0.18      0.29        28
                  Alpro-Shelf-Soy-Milk       0.72      0.93      0.81        30
                Alpro-Vanilla-Soyghurt       0.00      0.00      0.00        19
                                 Anjou       0.27      0.20      0.23        35
       Arla-Ecological-Medium-Fat-Milk       0.73      0.66      0.69        29
            Arla-Ecological-Sour-Cream       0.71      0.52      0.60        23
          Arla-Lactose-Medium-Fat-Milk       0.36      0.88      0.51        25
                  Arla-Medium-Fat-Milk       0.71      0.15      0.24        34
             Arla-Mild-Vanilla-Yoghurt       0.52      0.59      0.55        27
     Arla-Natural-Mild-Low-Fat-Yoghurt