# **III. Convolutional neural networks**

In [6]:


import os
import numpy as np
import shutil
import tensorflow as tf
import pickle as pkl
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

In [13]:

data_dir = r"C:/Spring 2023-2024/CMPS 261/Food Item Identification/project datasets"



FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:/Spring 2023-2024/CMPS 261/Food Item Identification/project datasets\\1'

In [None]:

def verify_images(folder_path):
    for fname in os.listdir(folder_path):
        fpath = os.path.join(folder_path, fname)
        try:
            img = tf.io.read_file(fpath)
            img = tf.io.decode_image(img)
        except:
            print(f'Removing corrupted image: {fpath}')
            os.remove(fpath)

for i in range(1, 10):
    verify_images(os.path.join(data_dir, str(i)))


sets = ['train', 'val', 'test']
for s in sets:
    set_path = os.path.join(data_dir, s)
    if not os.path.exists(set_path):
        os.makedirs(set_path)
    for i in range(1, 10):
        class_path = os.path.join(set_path, str(i))
        if not os.path.exists(class_path):
            os.makedirs(class_path)


def split_data(source, dest_train, dest_val, dest_test, split_train=0.8, split_val=0.1):
    files = os.listdir(source)
    np.random.shuffle(files)
    train_idx = int(len(files) * split_train)
    val_idx = int(len(files) * (split_train + split_val))
    for file in files[:train_idx]:
        shutil.copy(os.path.join(source, file), os.path.join(dest_train, file))
    for file in files[train_idx:val_idx]:
        shutil.copy(os.path.join(source, file), os.path.join(dest_val, file))
    for file in files[val_idx:]:
        shutil.copy(os.path.join(source, file), os.path.join(dest_test, file))


for i in range(1, 10):
    src_folder = os.path.join(data_dir, str(i))
    train_folder = os.path.join(data_dir, 'train', str(i))
    val_folder = os.path.join(data_dir, 'val', str(i))
    test_folder = os.path.join(data_dir, 'test', str(i))
    split_data(src_folder, train_folder, val_folder, test_folder)


In [None]:

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),  
    MaxPooling2D(2, 2),  
    Conv2D(64, (3,3), activation='relu'),  
    MaxPooling2D(2, 2), 
    Conv2D(128, (3,3), activation='relu'), 
    MaxPooling2D(2, 2),  
    Flatten(),  
    Dense(512, activation='relu'),  
    Dropout(0.5), 
    Dense(9, activation='softmax')  
])


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255) 


train_generator = train_datagen.flow_from_directory(
    os.path.join(data_dir, 'train'), 
    target_size=(150, 150),  
    batch_size=32,  
    class_mode='categorical' 
)


val_generator = val_datagen.flow_from_directory(
    os.path.join(data_dir, 'val'),  
    target_size=(150, 150),  
    batch_size=32, 
    class_mode='categorical'  
)



early_stopping = EarlyStopping(monitor='val_loss', patience=10)  



steps_per_epoch = len(train_generator.filenames) // train_generator.batch_size  
validation_steps = len(val_generator.filenames) // val_generator.batch_size  


# Train the model
history = model.fit(
    train_generator,  
    steps_per_epoch=steps_per_epoch, 
    epochs=50,  
    validation_data=val_generator, 
    validation_steps=validation_steps,  
    callbacks=[early_stopping]  
)
model_path = os.path.join('/content/drive/MyDrive', 'best_model.pkl')
pickle.dump(best_model, open(model_path, 'wb'))


Found 4950 images belonging to 9 classes.
Found 984 images belonging to 9 classes.
Epoch 1/50
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m371s[0m 2s/step - accuracy: 0.3403 - loss: 1.8998 - val_accuracy: 0.5729 - val_loss: 1.2209
Epoch 2/50
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3750 - loss: 1.6413 - val_accuracy: 0.7083 - val_loss: 0.9884
Epoch 3/50
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 2s/step - accuracy: 0.5951 - loss: 1.1131 - val_accuracy: 0.7385 - val_loss: 0.7650
Epoch 4/50
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7188 - loss: 0.8720 - val_accuracy: 0.7500 - val_loss: 0.7374
Epoch 5/50
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 2s/step - accuracy: 0.7263 - loss: 0.7609 - val_accuracy: 0.8125 - val_loss: 0.5350
Epoch 6/50
[1m154/154[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7188

In [None]:

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    os.path.join(data_dir, 'test'),
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

test_loss, test_acc = model.evaluate(test_generator)
print(f'Test accuracy: {test_acc}')



Found 972 images belonging to 9 classes.
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 736ms/step - accuracy: 0.9529 - loss: 0.1787
Test accuracy: 0.9578189253807068
