In [4]:
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
import numpy as np
import pandas as pd
from pathlib import Path
from nltk.tokenize import word_tokenize, sent_tokenize
import plotly.express as px
from plotly.offline import iplot

import os
import cv2
import argparse
import numpy as np
import pandas as pd
import glob
from PIL import Image
import datetime
import matplotlib.pyplot as plt

from collections import Counter
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from keras.callbacks import Callback
from keras.backend import clear_session
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Input, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.applications import resnet50, mobilenet, xception
from tensorflow.keras.optimizers import SGD

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# data_path = "/content/drive/MyDrive/gproject/Food Images/Food Images"
images_fl = '/content/drive/MyDrive/gproject/Food Images/Food Images'

In [7]:
image_files = os.listdir(images_fl)
image_file_set= set(image_files)

In [8]:
df = pd.read_csv('/content/drive/MyDrive/gproject/Food Ingredients and Recipe Dataset with Image Name Mapping.csv')

In [9]:
print(df['Image_Name'].dtype)
print(df['Image_Name'].isnull().sum())
# print(df['Image_Name'].isnull().sum())
missing_extensions = df[~df['Image_Name'].str.endswith(('.jpg'))]
print(missing_extensions)
df['Image_Name'] = df['Image_Name'].str.strip().str.lower()

object
0
       Unnamed: 0                                              Title  \
0               0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1               1                    Crispy Salt and Pepper Potatoes   
2               2                        Thanksgiving Mac and Cheese   
3               3                 Italian Sausage and Bread Stuffing   
4               4                                       Newton's Law   
...           ...                                                ...   
13496       13496                               Brownie Pudding Cake   
13497       13497  Israeli Couscous with Roasted Butternut Squash...   
13498       13498  Rice with Soy-Glazed Bonito Flakes and Sesame ...   
13499       13499                                        Spanakopita   
13500       13500  Mexican Poblano, Spinach, and Black Bean "Lasa...   

                                             Ingredients  \
0      ['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...   
1     

In [10]:
# fixing mising extensio
df['Image_Name'] = df['Image_Name'].apply(lambda x: x + '.jpg' if not x.endswith(('.jpg', '.png')) else x)

In [11]:
dataset_images = set(df['Image_Name'])  # Extract unique image names from the DataFrame
missing_images = dataset_images - image_file_set  # Find images in the DataFrame not in the directory
if missing_images:
    print(f"Missing images: {missing_images}")
else:
    print("No missing images.")
valid_df = df[df['Image_Name'].isin(image_file_set)]
print(f"Number of valid rows: {len(valid_df)}")

# # Use `valid_df` for dataset creation
# dataset = create_dataset(valid_df)

Missing images: {'#name?.jpg'}
Number of valid rows: 13471


In [12]:
df['Ingredients'] = df['Ingredients'].apply(lambda x: x.split(','))
valid_ingredients_list = sorted(list({Ingredient.strip() for row in df['Ingredients'] for Ingredient in row}))
NUM_INGREDIENTS = len(valid_ingredients_list)

In [13]:
NUM_INGREDIENTS

83374

In [14]:
# Ingredient to index mapping
ingredient_to_index ={Ingredient: idx for idx, Ingredient in enumerate(valid_ingredients_list)}
# ingredient_to_index

In [15]:
#  ingredients into a multi-hot vector
def encode_ingredients(ingredients):

    indices = [ingredient_to_index[ingredient.strip()] for ingredient in ingredients if ingredient.strip() in ingredient_to_index]
    return tf.keras.utils.to_categorical(indices, num_classes=NUM_INGREDIENTS).sum(axis=0)

valid_df['encoded_ingredients'] = valid_df['Ingredients'].apply(encode_ingredients)


In [16]:
# df['encoded_ingredients']

In [17]:
# Preprocessing
IMG_SIZE = 224
NUM_CHANNELS = 3

def preprocess_image(image_path):
    try:  # Wrap the image loading and decoding in a try-except block
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)  # Attempt to decode the image
    except tf.errors.InvalidArgumentError:
        # Handle invalid image files
        print(f"Warning: Unable to decode image at {image_path}. Skipping...")
        return tf.zeros((IMG_SIZE, IMG_SIZE, 3))  # Return a placeholder image

    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    return image


In [18]:
def preprocess_data(row, index=None):  # Add a second parameter
    image_name = row['Image_Name']  # Extract the image name from the row
    if image_name in image_file_set:
        # Only process the image if it exists in the directory
        image_path = os.path.join(images_fl, image_name)
        image = preprocess_image(image_path)
    else:
        # Handle missing images placeholder
        print(f"Warning: {image_name} not found in directory.")
        image = tf.zeros((IMG_SIZE, IMG_SIZE, 3))

    # Convert ingredients to a tensor
    ingredients = tf.convert_to_tensor(row['encoded_ingredients'], dtype=tf.float32)
    return image, ingredients

In [19]:
base_model = tf.keras.applications.MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3),
                                               include_top=False,
                                               weights='imagenet')
base_model.trainable = False  # Freeze the base model

input_layer = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(input_layer, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
output_layer = layers.Dense(NUM_INGREDIENTS, activation='sigmoid')(x)

model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [20]:
def data_generator(df):
    """Yields data one row at a time."""
    for _, row in df.iterrows():
        yield preprocess_data(row)

In [21]:
IMG_SIZE = 224
NUM_INGREDIENTS = 83374
BATCH_SIZE = 32
EPOCHS = 10
ACTI = "relu"

In [22]:
def create_dataset(df):
    """Creates a dataset using a generator."""
    dataset = tf.data.Dataset.from_generator(
        lambda: data_generator(df),
        output_signature=(
            tf.TensorSpec(shape=(IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(NUM_INGREDIENTS,), dtype=tf.float32)
        )
    )
    return dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Load and preprocess data
dataset = create_dataset(valid_df)

# Split dataset into train and test
test_dataset = dataset.take(1000)  # Take first 1000 samples for testing
train_dataset = dataset.skip(1000)  # Remaining samples for training

# Build the model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Rescaling(1.0 / 255, input_shape=(IMG_SIZE, IMG_SIZE, NUM_CHANNELS)))


# Resize and rescale layer (example)
resize_and_rescale = tf.keras.layers.Rescaling(1.0 / 255)

# Data augmentation (example)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
])

# model.add(resize_and_rescale)
model.add(data_augmentation)


In [23]:
dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 83374), dtype=tf.float32, name=None))>

In [24]:
# Add convolutional layers
CONV_DEPTHS = [32, 64, 128]
CONV_KERNEL_SIZE = (3, 3)
POOL_SIZE = (2, 2)
ACTI = "relu"

for size in CONV_DEPTHS:
    model.add(tf.keras.layers.Conv2D(size, CONV_KERNEL_SIZE, activation=ACTI, padding="same"))
    model.add(tf.keras.layers.MaxPooling2D(POOL_SIZE))

In [None]:

model.add(tf.keras.layers.Flatten())

# Add dense layers
DENSE_SIZES = [256, 128]
for size in DENSE_SIZES:
    model.add(tf.keras.layers.Dense(size, activation=ACTI))

# Final classification layer
model.add(tf.keras.layers.Dense(NUM_INGREDIENTS, activation="sigmoid"))

# Compile the model
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.AUC()]
)

# Print model summary
model.summary()

# Train the model
# callbacks = [
#     tf.keras.callbacks.ModelCheckpoint("model_checkpoint.keras", save_best_only=True),
#     tf.keras.callbacks.EarlyStopping(monitor="loss", patience=3),
#     tf.keras.callbacks.TensorBoard(log_dir="logs", histogram_freq=1),
# ]

history = model.fit(train_dataset,epochs=EPOCHS,validation_data=test_dataset)



Epoch 1/10


In [None]:
# Analyze training history
import matplotlib.pyplot as plt

# Plot training accuracy and loss
plt.figure(figsize=(10, 5))

# Training accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['binary_accuracy'], label='Training Accuracy')
plt.plot(history.history['val_binary_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.legend()

# Training loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.legend()

plt.show()


In [None]:
dataset