In [2]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences, to_categorical  # Add to_categorical here
from sklearn.model_selection import train_test_split

# Load the merged CSV file
data = pd.read_csv('merged_recipes_with_images.csv')

# Prepare a dictionary to hold images and recipes
recipe_images = {}
recipe_texts = {}

# Iterate through the data to group images by recipes and collect recipe texts
for index, row in data.iterrows():
    recipe = row['Recipe']
    images = row['Image Paths'].split(', ')
    recipe_text = row['Recipe']

    # Store images
    if recipe not in recipe_images:
        recipe_images[recipe] = []
    recipe_images[recipe].extend(images)

    # Store recipe text
    recipe_texts[recipe] = recipe_text

# Prepare image data for CNN
IMG_SIZE = 224  # Adjusting to 224x224
X_images = []
y_recipes = []

for recipe, images in recipe_images.items():
    for image_path in images:
        img = load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE))
        img_array = img_to_array(img) / 255.0  # Normalize the image
        X_images.append(img_array)
        y_recipes.append(recipe)

# Convert to numpy arrays
X_images = np.array(X_images)
y_recipes = np.array(y_recipes)

# Convert y_recipes to categorical labels
y_recipes_unique, y_recipes_encoded = np.unique(y_recipes, return_inverse=True)
num_classes = len(y_recipes_unique)

# Prepare recipe data for LSTM
MAX_SEQUENCE_LENGTH = 50  # Maximum number of words in recipe text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(recipe_texts.values())
sequences = tokenizer.texts_to_sequences(recipe_texts.values())
sequences = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)

X_recipes = []
y_recipes_lstm = []

for seq in sequences:
    for i in range(1, len(seq)):
        X_recipes.append(seq[:i])  # Previous words
        y_recipes_lstm.append(seq[i])  # Next word

# Pad the sequences for LSTM input
X_recipes = pad_sequences(X_recipes, maxlen=MAX_SEQUENCE_LENGTH)

# Convert to numpy arrays
X_recipes = np.array(X_recipes)
y_recipes_lstm = np.array(y_recipes_lstm)

# Convert y_recipes_lstm to categorical labels
y_recipes_lstm = to_categorical(y_recipes_lstm, num_classes=len(tokenizer.word_index) + 1)

# Split dataset into training and testing sets
X_train_images, X_test_images, y_train_recipes, y_test_recipes = train_test_split(X_images, y_recipes_encoded, test_size=0.2, random_state=42)
X_train_recipes, X_test_recipes, y_train_lstm, y_test_lstm = train_test_split(X_recipes, y_recipes_lstm, test_size=0.2, random_state=42)


In [7]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed

# Define Input Shape
image_shape = (224, 224, 3)  # Example image size
sequence_length = 10  # Number of time steps for LSTM
num_classes = 9  # Example: 9 food categories

# CNN Feature Extractor (TimeDistributed layer)
cnn_input = Input(shape=(sequence_length, *image_shape), name="image_input")  # Sequence of images
x = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'))(cnn_input)
x = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x)
x = TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same'))(x)
x = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(x)
x = TimeDistributed(Flatten())(x)
x = TimeDistributed(Dense(128, activation='relu'))(x)
cnn_output = Dropout(0.5)(x)

# LSTM Model
lstm_input = Input(shape=(sequence_length, 128), name="lstm_input")  # 128 is the CNN output feature size
y = LSTM(64, return_sequences=False)(lstm_input)
y = Dense(64, activation='relu')(y)
y = Dropout(0.5)(y)
final_output = Dense(num_classes, activation='softmax')(y)

# Create the Model
model = Model(inputs=[cnn_input, lstm_input], outputs=final_output)

# Compile the Model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Summary of the Model
model.summary()


In [10]:
print("X_train_images shape:", X_train_images.shape)  # Image input shape
print("X_train_recipes shape:", X_train_recipes.shape)  # Recipe input shape
print("y_train_recipes shape:", y_train_recipes.shape)  # Labels (one-hot encoded)


X_train_images shape: (1238, 224, 224, 3)
X_train_recipes shape: (352, 50)
y_train_recipes shape: (1238,)


In [13]:
# Duplicate recipe data to match the number of images
X_train_recipes = np.tile(X_train_recipes, (X_train_images.shape[0] // X_train_recipes.shape[0], 1))

# Now proceed with training
history = model.fit(
    [X_train_images, X_train_recipes], y_train_recipes,  # Inputs: image data and recipe data
    epochs=50,
    batch_size=32,
    validation_data=([X_test_images, X_test_recipes], y_test_recipes)
)


Epoch 1/50


ValueError: Input 0 of layer "functional" is incompatible with the layer: expected shape=(None, 10, 224, 224, 3), found shape=(32, 224, 224, 3)

In [None]:
import matplotlib.pyplot as plt

# Extract data from history
history_dict = history.history
loss = history_dict['loss']
val_loss = history_dict['val_loss']
accuracy = history_dict['accuracy']
val_accuracy = history_dict['val_accuracy']

# Create epochs range
epochs = range(1, len(loss) + 1)

# Plot training and validation loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, accuracy, 'b', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Show the plots
plt.tight_layout()
plt.show()
