# Importing necessary libraries

In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline
import numpy as np
from collections import defaultdict
import collections
import os
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten,Conv2D
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, AveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from tensorflow.keras.optimizers import SGD,Adam
from tensorflow.keras.regularizers import l2
from tensorflow import keras
import numpy as np
from sklearn.metrics import classification_report,confusion_matrix

# Definind different parameters and data augmentation . 

In [2]:
n_classes = 251
img_width, img_height = 224,224
train_data_dir = '/kaggle/input/251-sub-folder/subfolder/labeled_train_set'
validation_data_dir = '/kaggle/input/251-sub-folder/subfolder/labeled_val_set'
batch_size = 64
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
   validation_split=0.3
)

validation_datagen = ImageDataGenerator(rescale=1. / 255)




# Loading the datasets  using image-data generator .
**we used the default split method in image-data generator to split the train_set into training and testing dataset . validation dataset was provided from the creators **

In [3]:
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                    target_size=(img_width, img_width),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    class_mode='categorical',
                                                   subset='training')
test_generator= train_datagen.flow_from_directory(train_data_dir,
                                                 target_size=(img_width, img_width),
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 class_mode='categorical',
                                                subset='validation')
validation_generator = validation_datagen.flow_from_directory(validation_data_dir,
                                                              target_size=(img_width, img_width),
                                                              batch_size=batch_size,
                                                              shuffle=False,
                                                              class_mode='categorical'
                                                             )

Found 83051 images belonging to 251 classes.
Found 35424 images belonging to 251 classes.
Found 11994 images belonging to 251 classes.


# Importing the pre-trained res-net model from keras and freezing first 500 layers . We have decided to re-train the last few layers according to our dataset as the contain the most complex features .

In [4]:
resnet = tf.keras.applications.DenseNet201(weights='imagenet', include_top=False,input_shape=(img_width,img_width,3))
# for layer in resnet.layers[:400]:
#     layer.trainable=False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5


# Model Training 

In [5]:
from keras.callbacks import LearningRateScheduler
def learning_rate_scheduler(epoch, lr):
    initial_lr=0.00005
    warmup_epochs = 10 # Number of epochs for warm-up
    if epoch ==10:
        lr = initial_lr /10
  
        return lr
    else:
        return initial_lr


lr_scheduler = LearningRateScheduler(learning_rate_scheduler)

In [6]:
x = resnet.output
x = GlobalAveragePooling2D()(x)
x = Dense(300,activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(n_classes,kernel_regularizer=regularizers.l2(0.001), activation='softmax')(x)
model = Model(inputs=resnet.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=0.00005), loss='categorical_crossentropy', metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./resnet_best_model251.hdf5', verbose=1, save_best_only=True)

model.fit(train_generator,
        steps_per_epoch = 83051 // batch_size,
        validation_data = validation_generator,
        validation_steps = 11994 // batch_size,
        epochs=20,
        verbose=1,
      callbacks=[ lr_scheduler,checkpointer]
         )

Epoch 1/20
Epoch 1: val_loss improved from inf to 2.74316, saving model to ./resnet_best_model251.hdf5
Epoch 2/20
Epoch 2: val_loss improved from 2.74316 to 2.21847, saving model to ./resnet_best_model251.hdf5
Epoch 3/20
Epoch 3: val_loss improved from 2.21847 to 1.94374, saving model to ./resnet_best_model251.hdf5
Epoch 4/20
Epoch 4: val_loss improved from 1.94374 to 1.86641, saving model to ./resnet_best_model251.hdf5
Epoch 5/20
Epoch 5: val_loss did not improve from 1.86641
Epoch 6/20
Epoch 6: val_loss did not improve from 1.86641
Epoch 7/20
Epoch 7: val_loss improved from 1.86641 to 1.79893, saving model to ./resnet_best_model251.hdf5
Epoch 8/20
Epoch 8: val_loss did not improve from 1.79893
Epoch 9/20
Epoch 9: val_loss improved from 1.79893 to 1.79704, saving model to ./resnet_best_model251.hdf5
Epoch 10/20
Epoch 10: val_loss did not improve from 1.79704
Epoch 11/20
Epoch 11: val_loss improved from 1.79704 to 1.69813, saving model to ./resnet_best_model251.hdf5
Epoch 12/20
Epoch 1

<keras.callbacks.History at 0x7fa93f58e290>

# Test Result 

In [7]:
y_true = test_generator.classes

prediction = model.predict(test_generator)
y_pred = np.argmax(prediction, axis=1)



In [8]:

label_map = {}
with open('/kaggle/input/ifood-2019-fgvc6/class_list.txt', 'r') as f:
    for line in f:
        numerical_label, actual_label = line.strip().split()
        label_map[int(numerical_label)] = actual_label

# Convert numerical labels to actual labels
y_true = [label_map[label] for label in y_true]
y_pred = [label_map[label] for label in y_pred]

# Generate the classification report
report = classification_report(y_true, y_pred,zero_division=1)



print(report)

                               precision    recall  f1-score   support

                        adobo       0.35      0.48      0.41       181
                ambrosia_food       0.31      0.45      0.37       132
                    apple_pie       0.69      0.73      0.71       161
               apple_turnover       0.37      0.20      0.26       152
                   applesauce       0.57      0.35      0.44       147
              applesauce_cake       0.47      0.37      0.41       120
                baby_back_rib       0.40      0.59      0.48       175
               bacon_and_eggs       0.56      0.62      0.59       102
bacon_lettuce_tomato_sandwich       0.73      0.83      0.78       180
                 baked_alaska       0.42      0.54      0.47       164
                      baklava       0.51      0.45      0.48        86
          barbecued_spareribs       0.54      0.66      0.60       136
               barbecued_wing       0.67      0.50      0.57       152
     