In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Image classification with Transfer learning and Fine tuning

## Import Libraries

In [1]:
import tensorflow as tf

### Set path variables

In [3]:
train_dir = '/kaggle/input/nobahar/Food-Data/training/'
test_dir = '/kaggle/input/nobahar/Food-Data/test/'

### Let's see food labels

In [6]:
classes = os.listdir(train_dir)

### Count number of training samples in each group

In [7]:
for label in classes:
    label_path = os.path.join(train_dir , label)
    print(f"{label} : {len(os.listdir(label_path))}")

## Load the data into train and validation sets

In [8]:
IMG_SIZE = (224,224)
VALID_SPLIT = 0.2

In [9]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  "../input/nobahar/Food-Data/training",
  validation_split=VALID_SPLIT,  
  subset="training",
  seed=123,
  image_size=IMG_SIZE,
  batch_size=64)

In [10]:
valid_ds = tf.keras.utils.image_dataset_from_directory(
  "../input/nobahar/Food-Data/training",
  validation_split=VALID_SPLIT,
  subset="validation",
  seed=123,
  image_size=IMG_SIZE,
  batch_size=64)

# Pre-Process

In [11]:
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = (
    train_ds
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
valid_ds = (
    valid_ds
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

# Load Pre trained model

In [12]:
base_model = tf.keras.applications.xception.Xception(input_shape=(224,224,3),
                                               include_top=False,
                                               weights='imagenet')
# fine tuning
base_model.trainable = True

# Define learning rate schedular 

In [13]:
import matplotlib.pyplot as plt 
EPOCHS = 13
LR_START = 0.00001
LR_MAX = 0.0001 * 0.6
LR_MIN = 0.00001
LR_RAMPUP_EPOCHS = 3
LR_SUSTAIN_EPOCHS = 3
LR_EXP_DECAY = .5

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr
    
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)

rng = [i for i in range(EPOCHS)]
y = [lrfn(x) for x in rng]
plt.plot(rng, y)
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(y[0], max(y), y[-1]))

# Create model

In [15]:
from tensorflow.keras.layers.experimental import preprocessing
input_shape=[IMG_SIZE[0],IMG_SIZE[1], 3]
model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=input_shape),
        #data augmentation
        preprocessing.RandomFlip('horizontal'), # flip left-to-right
        preprocessing.RandomContrast(0.5),
        base_model,
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(5, activation='softmax')
    ])

# Fit the model to the data

In [16]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

history = model.fit(
    train_ds,
    validation_data=valid_ds,
    callbacks = [lr_callback],
    epochs=13,
    batch_size=128
)

# Plot training and validation loss 

In [18]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:,['loss' , 'val_loss']].plot()

# Get test image names 

In [20]:
test_df = pd.DataFrame()
test_image_names = []
for pic in os.listdir(test_dir):
    test_image_names.append(pic)
test_df['file'] = test_image_names

# Load test data to predict

In [21]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [22]:
test_datagen = ImageDataGenerator(1./255)

In [23]:
test_ds = test_datagen.flow_from_dataframe(
    dataframe=test_df ,
    directory=test_dir ,
    x_col = 'file' ,
    y_col = None ,
    target_size=IMG_SIZE,
    batch_size=1,
    shuffle=False,
    class_mode=None
)

# Predict test set labels

In [24]:
preds = model.predict(test_ds)

In [25]:
preds.shape

In [27]:
labels = ['Egg' , 'Fried food' , 'Meat' , 'Rice' , 'Seafood']

In [31]:
predicted_label_index = preds.argmax(axis=1)

In [32]:
predicted_label_index

# Get predicted label names

In [33]:
predicted_names = []
for index in predicted_label_index:
    predicted_names.append(labels[index])

In [34]:
test_df['predicted'] = predicted_names

In [35]:
test_df.head()

# Save results

In [36]:
test_df.to_csv('output.csv' , index=False)