# Training image classifier
## Normal classifier
First, we perform transfer learning on ResNet50v2 with Keras to classify images on the labels listed in `'cuisines_1'`.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.applications import ResNet50V2
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.optimizers import Adam

# Load dataframe
df = pd.read_csv('labels.csv')

# Change 'image_id' to string and add '.jpg' to the end
df['image_id'] = df['image_id'].astype(str) + '.jpg'

# Split dataframe into train and test
train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)


### Preprocess image data
First we preprocess the image data. For the image augmentation, we rescale the images by 1/255, apply a shear range of 0.2, zoom range of 0.2, and horizontal flip. This is done for both train and test data.
Next, we create batches of augmented images of size 32. `class_mode` is set to `categorical` since we have multiple classes.

In [12]:
# Create data augmentation instance for train set with ImageDataGenerator
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Create batches of augmented images from trainset
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='./images',
    x_col='image_id',
    y_col='cuisines_1',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Create data augmentation instance for test set with ImageDataGenerator
test_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# Create batches of augmented images from testset
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory='./images',
    x_col='image_id',
    y_col='cuisines_1',
    target_size=(224, 224),
    batch_size=16, #32,
    class_mode='categorical'
)


Found 77074 validated image filenames belonging to 43 classes.
Found 19268 validated image filenames belonging to 43 classes.


### Create model
Load ResNet50v2 model with pretrained ImageNet weights and add a few new layers to finetune with our dataset. Reduce dimensions of the base model with a global average pooling layer and then add a fully connected layer with 1024 units and ReLU activation. Then create the final layer with the number of classes that is uses softmax activation to make classifications.

In [8]:
# Load ResNet50V2 model
base_model = ResNet50V2(include_top=False, weights='imagenet')

# Add new layers on top of ResNet50V2
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(len(train_generator.class_indices), activation='softmax')(x)

# Define new model
model = Model(inputs=base_model.input, outputs=predictions)


Freeze the layers in the base model so they don't get overwritten, and then compile the model with Adam optimizer and categorical crossentropy loss.

In [9]:
# Freeze each layer in the model
for layer in base_model.layers:
    layer.trainable = False

# Compile model
model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(name, **kwargs)


### Train model

In [10]:
# Train model
model.fit(
    train_generator,
    epochs=2,
    validation_data=test_generator
)

Epoch 1/2

UnknownError: Graph execution error:

2 root error(s) found.
  (0) UNKNOWN:  UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x00000161A10EAE30>
Traceback (most recent call last):

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\tensorflow\python\ops\script_ops.py", line 271, in __call__
    ret = func(*args)

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 1035, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\engine\data_adapter.py", line 903, in wrapped_generator
    for data in generator_fn():

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\engine\data_adapter.py", line 1050, in generator_fn
    yield x[i]

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\preprocessing\image.py", line 116, in __getitem__
    return self._get_batches_of_transformed_samples(index_array)

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\preprocessing\image.py", line 370, in _get_batches_of_transformed_samples
    img = image_utils.load_img(

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\utils\image_utils.py", line 423, in load_img
    img = pil_image.open(io.BytesIO(f.read()))

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\PIL\Image.py", line 3298, in open
    raise UnidentifiedImageError(msg)

PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x00000161A10EAE30>


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
	 [[IteratorGetNext/_4]]
  (1) UNKNOWN:  UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x00000161A10EAE30>
Traceback (most recent call last):

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\tensorflow\python\ops\script_ops.py", line 271, in __call__
    ret = func(*args)

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 642, in wrapper
    return func(*args, **kwargs)

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 1035, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\engine\data_adapter.py", line 903, in wrapped_generator
    for data in generator_fn():

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\engine\data_adapter.py", line 1050, in generator_fn
    yield x[i]

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\preprocessing\image.py", line 116, in __getitem__
    return self._get_batches_of_transformed_samples(index_array)

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\preprocessing\image.py", line 370, in _get_batches_of_transformed_samples
    img = image_utils.load_img(

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\keras\utils\image_utils.py", line 423, in load_img
    img = pil_image.open(io.BytesIO(f.read()))

  File "c:\Users\amarm\Documents\GitHub\AdvancedAnalytics_projects\assignment-2\.virtenv2\lib\site-packages\PIL\Image.py", line 3298, in open
    raise UnidentifiedImageError(msg)

PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x00000161A10EAE30>


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_19145]

In [None]:
# Save model
model.save('resnet50v2_model.h5')


In [None]:
# Save weights
model.save_weights('resnet50v2_weights.h5')

## Multilabel Classifier
Next, we will attempt to train a multilabel classifier that includes all the labels in both `'cuisines_1'` and `'cuisines_2'`.