In [1]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# import backend
import tensorflow as  tf

# Model architecture
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Dropout, Flatten, Conv2D, BatchNormalization
from keras.layers import MaxPool2D, Activation, MaxPooling2D

# model optimisation and scores
from sklearn import metrics
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.optimizers import Adam, SGD
from keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_auc_score, auc, roc_curve

# Annealer
from keras.callbacks import LearningRateScheduler
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Data processing
from keras.utils import to_categorical

#Custom data generator
from tensorflow.keras.utils import Sequence

#Save the model
from tensorflow.keras.models import load_model

# Visualization
import matplotlib.pyplot as plt
import h5py

policy = tf.keras.mixed_precision.Policy('mixed_float16')
tf.keras.mixed_precision.set_global_policy(policy)

2024-09-01 12:58:20.313709: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-01 12:58:20.313876: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-01 12:58:20.443583: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
%cd /kaggle/input/food-101/
print("Files in the input directory")
print(os.listdir("food-101/food-101/meta"))

!head 'food-101/food-101/meta/classes.txt'
!head 'food-101/food-101/meta/train.txt'
!head 'food-101/food-101/meta/labels.txt'
!head 'food-101/food-101/meta/test.txt'

/kaggle/input/food-101
Files in the input directory
['test.txt', 'train.json', 'labels.txt', 'test.json', 'train.txt', 'classes.txt']
apple_pie
baby_back_ribs
baklava
beef_carpaccio
beef_tartare
beet_salad
beignets
bibimbap
bread_pudding
breakfast_burrito
apple_pie/1005649
apple_pie/1014775
apple_pie/1026328
apple_pie/1028787
apple_pie/1043283
apple_pie/1050519
apple_pie/1057749
apple_pie/1057810
apple_pie/1072416
apple_pie/1074856
Apple pie
Baby back ribs
Baklava
Beef carpaccio
Beef tartare
Beet salad
Beignets
Bibimbap
Bread pudding
Breakfast burrito
apple_pie/1011328
apple_pie/101251
apple_pie/1034399
apple_pie/103801
apple_pie/1038694
apple_pie/1047447
apple_pie/1068632
apple_pie/110043
apple_pie/1106961
apple_pie/1113017


In [3]:
# Read the content of the 'classes.txt' file
with open('food-101/food-101/meta/classes.txt', 'r') as input_file:
    lines = input_file.readlines()

# Take the first 20 lines from the 'classes.txt' file
first_20_lines = lines[:20]

# Add the word "other" as the 21st line
first_20_lines.append("other\n")

# Write the modified content to the 'classes_mod.txt' file
with open('/kaggle/working/classes_mod.txt', 'w') as output_file:
    output_file.writelines(first_20_lines)

print("Modified file 'classes_mod.txt' created successfully and here is the file:")
df = pd.read_csv('/kaggle/working/classes_mod.txt', sep='\t', header=None)
print(df)

Modified file 'classes_mod.txt' created successfully and here is the file:
                     0
0            apple_pie
1       baby_back_ribs
2              baklava
3       beef_carpaccio
4         beef_tartare
5           beet_salad
6             beignets
7             bibimbap
8        bread_pudding
9    breakfast_burrito
10          bruschetta
11        caesar_salad
12             cannoli
13       caprese_salad
14         carrot_cake
15             ceviche
16          cheesecake
17        cheese_plate
18       chicken_curry
19  chicken_quesadilla
20               other


In [5]:
############## First things first: Create a list of the image paths #############
def read_train_images_from_file(file_path, base_directory='food-101/food-101/images', image_extension='.jpg', num_lines=None):
    with open(file_path, 'r') as file, open('/kaggle/working/sorted_full_paths.txt', 'w') as output_file:
        lines = file.readlines()[:num_lines] if num_lines is not None else file.readlines()     
        for line in lines:
            image_path = line.strip()  # Remove leading/trailing whitespace
            full_image_path = os.path.join(base_directory, image_path + image_extension)
            #print('full_image_path',full_image_path)
            output_file.write(full_image_path)
            output_file.write("\n")
            
def read_test_images_from_file(file_path, base_directory='food-101/food-101/images', image_extension='.jpg', num_lines=None):
    with open(file_path, 'r') as file, open('/kaggle/working/test_full_paths.txt', 'w') as output_file:
        lines = file.readlines()[:num_lines] if num_lines is not None else file.readlines()     
        for line in lines:
            image_path = line.strip()  # Remove leading/trailing whitespace
            full_image_path = os.path.join(base_directory, image_path + image_extension)
            #print('full_image_path',full_image_path)
            output_file.write(full_image_path)
            output_file.write("\n")            

#read_images_from_file('food-101/food-101/meta/train.txt', num_lines=42)
read_train_images_from_file('food-101/food-101/meta/train.txt')
read_test_images_from_file('food-101/food-101/meta/test.txt')
print("sorted_full_paths.txt' created successfully and here is the file:")

sorted_full_paths.txt' created successfully and here is the file:


In [8]:
#from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255,
                                    rotation_range=10,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    shear_range=0.2,
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    vertical_flip=True
                                  )
test_datagen = ImageDataGenerator(rescale=1./255)

# Read train image paths
with open('/kaggle/working/sorted_full_paths.txt', 'r') as file:
    image_paths = file.read().splitlines()

# Read train labels and process them
with open('/kaggle/working/train_mod.txt', 'r') as file:
    labels = [line.split('/')[0].replace('_', ' ') for line in file]  # Remove underscores from labels

# Create a train DataFrame
df_train = pd.DataFrame({'filename': image_paths, 'label': labels})
print(df_train)

train_generator = train_datagen.flow_from_dataframe(dataframe=df_train, 
                                                   x_col='filename',
                                                   y_col='label',  # If you have labels
                                                   batch_size=100,
                                                   class_mode='categorical')  # Change this according to your task

# Read test image paths
with open('/kaggle/working/test_full_paths.txt', 'r') as file:
    test_paths = file.read().splitlines()

# Read test labels and process them
with open('/kaggle/working/test_mod.txt', 'r') as file:
    test_labels = [line.split('/')[0].replace('_', ' ') for line in file]  # Remove underscores from labels

# Create a test DataFrame
df_test = pd.DataFrame({'filename': test_paths, 'label': test_labels})
print(df_test)

test_generator = test_datagen.flow_from_dataframe(dataframe=df_test,
                                                 x_col='filename',
                                                 y_col='label',  # If you have labels
                                                 batch_size=100,
                                                 class_mode='categorical')

                                             filename      label
0      food-101/food-101/images/apple_pie/1005649.jpg  apple pie
1      food-101/food-101/images/apple_pie/1014775.jpg  apple pie
2      food-101/food-101/images/apple_pie/1026328.jpg  apple pie
3      food-101/food-101/images/apple_pie/1028787.jpg  apple pie
4      food-101/food-101/images/apple_pie/1043283.jpg  apple pie
...                                               ...        ...
75745     food-101/food-101/images/waffles/981485.jpg      other
75746      food-101/food-101/images/waffles/98238.jpg      other
75747     food-101/food-101/images/waffles/982668.jpg      other
75748     food-101/food-101/images/waffles/995085.jpg      other
75749     food-101/food-101/images/waffles/999047.jpg      other

[75750 rows x 2 columns]
Found 75750 validated image filenames belonging to 21 classes.
                                             filename      label
0      food-101/food-101/images/apple_pie/1011328.jpg  apple pie
1

In [9]:
#Two GPU Function
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

from keras.applications import VGG16
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

# Build your model here
with strategy.scope():
    # Load the pre-trained model (excluding the top classification layers)
    #base_model = VGG16(weights='imagenet', include_top=False, input_shape=(240, 320, 3))
    base_model = VGG16(weights='imagenet', include_top=False)
    
    # Add custom classification layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(21, activation='softmax')(x)  # Assuming 21 classes

    # Create the final model
    model = Model(inputs=base_model.input, outputs=predictions)

    # Freeze the layers from the pre-trained model (optional)
    for layer in base_model.layers:
        layer.trainable = False
    
    # Compile the model
    #model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    # Add regularization (dropout)
    model.add(Dropout(0.3))
    
    #Defining learning schedule    
    from keras.callbacks import ReduceLROnPlateau
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001)

    
    #Train the model using your custom data generator
    model.fit(train_generator, epochs=20, callbacks=[reduce_lr], validation_data=test_generator)

Number of devices: 2
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


ValueError: Argument(s) not recognized: {'lr': 0.001}

In [None]:
evaluation = model.evaluate(test_generator)

# Save the model (optional)
model.save("/kaggle/working/model.h5")