This task intends to measure your deep learning basic skills. We will provide you a dataset containing 131 different fruits. Download it from this [link](https:///www.kaggle.com/moltean/fruits), download it and upload it to your copy of this file

You have to implement a Convolutional Neural Network to classify the input fruit image. For this task, you must follow the following rules:

- Use a ResNet50 as your CNN backbone.
- The model output must be a probability score for all the classes.
- You can implement the model in any Deep Learning framework that you are used to use.
- Make sure you write your code following the best practices.
- Make sure you document all the steps you took to solve the problem.


In [1]:
# Import all needed libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from PIL import Image

#Auxiliary libraries
from os import listdir, path
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [2]:
#Declaring and defining the constants
BASE_FOLDER = 'data'
TRAIN_FOLDER = 'data/Training'
TEST_FOLDER = 'data/Test'
TRAIN_DF = 'train_df.csv'
TEST_DF = 'test_df.csv'
COLUMNS = {
    'LABEL':'label',
    'FILE':'file'
}

INPUT_SHAPE = (100,100,3)
IMG_SIZE = (100,100)
VAL_SIZE = 0.2
BATCH_SIZE = 256
EPOCHS = 30
SEED = 42

In [3]:
def generate_dataframe(data_folder, columns = COLUMNS):
    labels = listdir(data_folder)
    
    dataframe = pd.DataFrame()
    
    for lbl in labels:
        data = {
            columns['FILE'] : [path.join(lbl,file) for file in listdir(path.join(data_folder,lbl))],
            columns['LABEL']: lbl
        }
        df = pd.DataFrame(data)
        
        dataframe = dataframe.append(df)
    
    return dataframe.sample(frac=1).reset_index(drop=True)

def load_dataframe(df_path):
    if path.isfile(df_path):
        return pd.read_csv(df_path)
    else:
        return None


In [4]:
#Load the dataframe

train_df = load_dataframe(path.join(BASE_FOLDER, TRAIN_DF))

if train_df is None:
    train_df = generate_dataframe(TRAIN_FOLDER)
    train_df.to_csv(path.join(BASE_FOLDER, TRAIN_DF),index=False)
    

test_df = load_dataframe(path.join(BASE_FOLDER, TEST_DF))

if test_df is None:
    test_df = generate_dataframe(TEST_FOLDER)
    test_df.to_csv(path.join(BASE_FOLDER, TEST_DF),index=False)

In [5]:
CLASSES = pd.unique(train_df['label'])
QTT_CLASSES = CLASSES.shape[0]

In [6]:
train_generator = ImageDataGenerator(
    preprocessing_function = preprocess_input,
    validation_split = VAL_SIZE)

test_generator = ImageDataGenerator(
    preprocessing_function=preprocess_input)

In [7]:
train_images = train_generator.flow_from_dataframe(
    dataframe = train_df,
    directory = TRAIN_FOLDER,
    x_col = COLUMNS['FILE'],
    y_col = COLUMNS['LABEL'],
    target_size = IMG_SIZE,
    color_mode = 'rgb',
    class_mode = 'categorical',
    batch_size = BATCH_SIZE,
    shuffle = True,
    seed = SEED,
    subset = 'training',
    rotation_range = 30,
    zoom_range = 0.10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.15,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = "nearest")

val_images = train_generator.flow_from_dataframe(
    dataframe = train_df,
    directory = TRAIN_FOLDER,
    x_col = COLUMNS['FILE'],
    y_col = COLUMNS['LABEL'],
    target_size = IMG_SIZE,
    color_mode = 'rgb',
    class_mode = 'categorical',
    batch_size = BATCH_SIZE,
    shuffle = True,
    seed = SEED,
    subset = 'validation',
    rotation_range = 30,
    zoom_range = 0.10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.15,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = "nearest")

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    directory = TEST_FOLDER,
    x_col = COLUMNS['FILE'],
    y_col = COLUMNS['LABEL'],
    target_size = IMG_SIZE,
    color_mode = 'rgb',
    class_mode = 'categorical',
    batch_size = BATCH_SIZE,
    shuffle=False)

Found 54154 validated image filenames belonging to 131 classes.
Found 13538 validated image filenames belonging to 131 classes.
Found 22688 validated image filenames belonging to 131 classes.


In [8]:
resnet50 = ResNet50(
    input_shape = INPUT_SHAPE,
    include_top = False,
    weights = 'imagenet',
    pooling = 'avg'
)

In [9]:
inputs = resnet50.input

layer = Dense(128, activation='relu')(resnet50.output)
layer = Dense(128, activation='relu')(layer)

outputs = Dense(QTT_CLASSES, activation='softmax')(layer)

model = Model(inputs = inputs, outputs = outputs)

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'])

# model.summary()

In [10]:
history = model.fit(
    train_images,
    validation_data = val_images,
    epochs = EPOCHS
)

Epoch 1/30


ResourceExhaustedError:  OOM when allocating tensor with shape[256,256,25,25] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node model/conv2_block2_3_bn/FusedBatchNormV3 (defined at <ipython-input-10-313df063b9fa>:1) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_17578]

Function call stack:
train_function
