In [None]:
import pandas as pd
import json
import os
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

import warnings
warnings.filterwarnings("ignore")

In [None]:
import os
import subprocess

def download_kaggle_dataset():
    artifacts_dir = "../artifacts"
    
    kaggle_url = "https://www.kaggle.com/api/v1/datasets/download/sainikhileshreddy/food-recognition-2022"
    destination_path = os.path.join(artifacts_dir, "food-recognition-2022.zip")
    
    if not os.path.exists(artifacts_dir):
        os.makedirs(artifacts_dir)
        print(f"Created directory: {artifacts_dir}")
    
    try:
        print(f"Downloading dataset to {destination_path}...")
        subprocess.run(
            ["curl", "-L", "-o", destination_path, kaggle_url],
            check=True
        )
        print("Download completed successfully!")
    except subprocess.CalledProcessError as e:
        print(f"Error occurred while downloading the dataset: {e}")

if __name__ == "__main__":
    download_kaggle_dataset()


In [None]:
import os
import zipfile

def unzip_file(zip_file_path, extract_to_dir):
    """
    Unzips a .zip file to the specified directory.
    
    Parameters:
        zip_file_path (str): Path to the .zip file.
        extract_to_dir (str): Directory where the files will be extracted.
    """
    if not os.path.exists(extract_to_dir):
        os.makedirs(extract_to_dir)
        print(f"Created directory: {extract_to_dir}")
    
    try:
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to_dir)
            print(f"Successfully extracted {zip_file_path} to {extract_to_dir}")
    except zipfile.BadZipFile:
        print(f"Error: {zip_file_path} is not a valid zip file.")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    zip_file = "../artifacts/food-recognition-2022.zip"  
    output_dir = "../artifacts/FoodRecognition"              
    
    unzip_file(zip_file, output_dir)


In [None]:
annotations_file_train = r'/Users/mac/Desktop/FoodRecommendation/artifacts/FoodRecognition/raw_data/public_training_set_release_2.0/annotations.json'
annotations_file_val = r'/Users/mac/Desktop/FoodRecommendation/artifacts/FoodRecognition/raw_data/public_validation_set_2.0/annotations.json'
image_file_train = r'/Users/mac/Desktop/FoodRecommendation/artifacts/FoodRecognition/raw_data/public_training_set_release_2.0/images'
image_file_val = r'/Users/mac/Desktop/FoodRecommendation/artifacts/FoodRecognition/raw_data/public_validation_set_2.0/images'

In [None]:
with open(annotations_file_train, 'r') as file:
    train_data = json.load(file)

train_images = pd.DataFrame(train_data['images']).rename(columns={'id': 'image_id'})[['image_id', 'file_name']]
train_categories = pd.DataFrame(train_data['categories'])[['id', 'name']].rename(columns={'id': 'category_id'})
train_annotations = pd.DataFrame(train_data['annotations'])[['image_id', 'category_id']]

train_df = train_annotations.merge(train_categories, on='category_id').merge(train_images, on='image_id')[['file_name', 'name']]

In [None]:
train_df

In [None]:
organized_train_dir = "../artifacts/FoodRecognition/organized_training_data"

if not os.path.exists(organized_train_dir):
    os.makedirs(organized_train_dir)

for category in train_df['name'].unique():
    category_dir = os.path.join(organized_train_dir, category)
    if not os.path.exists(category_dir):
        os.makedirs(category_dir)

for idx, row in train_df.iterrows():
    src = os.path.join(image_file_train, row['file_name'])
    dst = os.path.join(organized_train_dir, row['name'], row['file_name'])
    if os.path.exists(src): 
        shutil.copy(src, dst)

In [None]:
with open(annotations_file_val, 'r') as file:
    val_data = json.load(file)

val_images = pd.DataFrame(val_data['images']).rename(columns={'id': 'image_id'})[['image_id', 'file_name']]
val_categories = pd.DataFrame(val_data['categories'])[['id', 'name']].rename(columns={'id': 'category_id'})
val_annotations = pd.DataFrame(val_data['annotations'])[['image_id', 'category_id']]

val_df = val_annotations.merge(val_categories, on='category_id').merge(val_images, on='image_id')[['file_name', 'name']]

organized_val_dir = "../artifacts/FoodRecognition/organized_validation_data"

if not os.path.exists(organized_val_dir):
    os.makedirs(organized_val_dir)

with open(annotations_file_train, 'r') as file:
    train_data = json.load(file)

train_categories = pd.DataFrame(train_data['categories'])[['id', 'name']].rename(columns={'id': 'category_id'})

for category in train_categories['name'].unique():
    category_dir = os.path.join(organized_val_dir, category)
    if not os.path.exists(category_dir):
        os.makedirs(category_dir)

for idx, row in val_df.iterrows():
    src = os.path.join(image_file_val, row['file_name'])
    dst = os.path.join(organized_val_dir, row['name'], row['file_name'])
    if os.path.exists(src): 
        shutil.copy(src, dst)


In [None]:
print("Classes in validation set:", len(os.listdir(organized_val_dir)))

In [None]:
print("Training Directory Structure:")
for root, dirs, files in os.walk(image_file_train):
    print(root, "contains", len(files), "files")

print("Validation Directory Structure:")
for root, dirs, files in os.walk(image_file_val):
    print(root, "contains", len(files), "files")

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,             
    rotation_range=20,           
    width_shift_range=0.2,      
    height_shift_range=0.2,      
    shear_range=0.2,             
    zoom_range=0.2,              
    horizontal_flip=True,        
    fill_mode='nearest',
)

train_generator = train_datagen.flow_from_directory(
    organized_train_dir,
    target_size=(224, 224),     # GOOGLENET InputSize
    batch_size=32,
    class_mode='categorical',
    shuffle=True,
    seed=42,
    color_mode='rgb',  
    interpolation='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = val_datagen.flow_from_directory(
    organized_val_dir,
    target_size=(224, 224),      
    batch_size=32, 
    class_mode='categorical'     
)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout, Input, concatenate
)
from tensorflow.keras.models import Model

# Input layer
input_layer = Input(shape=(224, 224, 3))

# Initial convolution and pooling layers
x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu')(input_layer)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

x = Conv2D(64, (1, 1), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(192, (3, 3), strides=(1, 1), padding='same', activation='relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

# Inception module 3a
branch1_3a = Conv2D(64, (1, 1), padding='same', activation='relu')(x)

branch2_3a = Conv2D(96, (1, 1), padding='same', activation='relu')(x)
branch2_3a = Conv2D(128, (3, 3), padding='same', activation='relu')(branch2_3a)

branch3_3a = Conv2D(16, (1, 1), padding='same', activation='relu')(x)
branch3_3a = Conv2D(32, (5, 5), padding='same', activation='relu')(branch3_3a)

branch4_3a = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
branch4_3a = Conv2D(32, (1, 1), padding='same', activation='relu')(branch4_3a)

x = concatenate([branch1_3a, branch2_3a, branch3_3a, branch4_3a], axis=-1)

# Inception module 3b
branch1_3b = Conv2D(128, (1, 1), padding='same', activation='relu')(x)

branch2_3b = Conv2D(128, (1, 1), padding='same', activation='relu')(x)
branch2_3b = Conv2D(192, (3, 3), padding='same', activation='relu')(branch2_3b)

branch3_3b = Conv2D(32, (1, 1), padding='same', activation='relu')(x)
branch3_3b = Conv2D(96, (5, 5), padding='same', activation='relu')(branch3_3b)

branch4_3b = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
branch4_3b = Conv2D(64, (1, 1), padding='same', activation='relu')(branch4_3b)

x = concatenate([branch1_3b, branch2_3b, branch3_3b, branch4_3b], axis=-1)

x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)

# Inception module 4a
branch1_4a = Conv2D(192, (1, 1), padding='same', activation='relu')(x)

branch2_4a = Conv2D(96, (1, 1), padding='same', activation='relu')(x)
branch2_4a = Conv2D(208, (3, 3), padding='same', activation='relu')(branch2_4a)

branch3_4a = Conv2D(16, (1, 1), padding='same', activation='relu')(x)
branch3_4a = Conv2D(48, (5, 5), padding='same', activation='relu')(branch3_4a)

branch4_4a = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
branch4_4a = Conv2D(64, (1, 1), padding='same', activation='relu')(branch4_4a)

x = concatenate([branch1_4a, branch2_4a, branch3_4a, branch4_4a], axis=-1)

# Auxiliary output
aux1 = AveragePooling2D((5, 5), strides=(3, 3))(x)
aux1 = Conv2D(128, (1, 1), padding='same', activation='relu')(aux1)
aux1 = Flatten()(aux1)
aux1 = Dense(1024, activation='relu')(aux1)
aux1 = Dropout(0.7)(aux1)
aux1 = Dense(498, activation='softmax')(aux1)  # Updated to match main output

# Inception module 4b
branch1_4b = Conv2D(160, (1, 1), padding='same', activation='relu')(x)

branch2_4b = Conv2D(112, (1, 1), padding='same', activation='relu')(x)
branch2_4b = Conv2D(224, (3, 3), padding='same', activation='relu')(branch2_4b)

branch3_4b = Conv2D(24, (1, 1), padding='same', activation='relu')(x)
branch3_4b = Conv2D(64, (5, 5), padding='same', activation='relu')(branch3_4b)

branch4_4b = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
branch4_4b = Conv2D(64, (1, 1), padding='same', activation='relu')(branch4_4b)

x = concatenate([branch1_4b, branch2_4b, branch3_4b, branch4_4b], axis=-1)

# Inception module 5a
branch1_5a = Conv2D(256, (1, 1), padding='same', activation='relu')(x)

branch2_5a = Conv2D(160, (1, 1), padding='same', activation='relu')(x)
branch2_5a = Conv2D(320, (3, 3), padding='same', activation='relu')(branch2_5a)

branch3_5a = Conv2D(32, (1, 1), padding='same', activation='relu')(x)
branch3_5a = Conv2D(128, (5, 5), padding='same', activation='relu')(branch3_5a)

branch4_5a = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
branch4_5a = Conv2D(128, (1, 1), padding='same', activation='relu')(branch4_5a)

x = concatenate([branch1_5a, branch2_5a, branch3_5a, branch4_5a], axis=-1)

# Final layers
x = AveragePooling2D((7, 7))(x)
x = Dropout(0.4)(x)
x = Flatten()(x)
output = Dense(498, activation='softmax')(x)  # Main output

# Build and compile the model
model = Model(inputs=input_layer, outputs=[output, aux1])
model.compile(
    optimizer='adam',
    loss=['categorical_crossentropy', 'categorical_crossentropy'],  # Loss for both outputs
    metrics=[['accuracy'], ['accuracy']]  # Metrics for both outputs
)

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // val_generator.batch_size,
    epochs=10
)
