In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

# Assumptions:
# - `df_images` is a DataFrame with image paths and IDs
# - `df_ingredients` is a DataFrame with ingredients information and IDs
# - `your_label_columns` are the columns representing the ingredients information in the DataFrame

# Merge both DataFrames based on the image IDs
merged_df = pd.merge(df_images, df_ingredients, on='image_id')

# Split the data into training and test sets
train_df, test_df = train_test_split(merged_df, test_size=0.2, random_state=42)

# Create data generators for images
'''train_datagen is used to generate data for training the model by reading images from a directory, performing transformations, and normalization.

ImageDataGenerator is a class in TensorFlow/Keras that generates batches of augmented/normalized data from image data.
It provides a flexible way to preprocess and augment images on-the-fly during training, without needing to pre-process and store all the images in memory.'''

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)


'''The train_generator generates batches of augmented/normalized data from image data and labels in the form of a DataFrame.
In the provided code, the train_generator is specifically responsible for generating training data for the model.
It generates batches of data from the DataFrame train_df, which contains information about image paths and corresponding labels (ingredients in this case).'''

#VARIABLES in train_generator:
'''The "image_path" parameter in the flow_from_dataframe() method indicates which column of the DataFrame contains the paths to the images.
When you provide this parameter, the ImageDataGenerator will use the paths specified in that column to load the images for processing.'''

'''your_label_columns represents the column(s) in your DataFrame that contain the labels or target variables for your classification task.
In the context of the provided code, it indicates the column(s) that contain information about the ingredients or classes associated with each image.'''

'''batch_size is a hyperparameter that determines the number of samples processed by the model in each training iteration or batch during the training process.'''

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col="image_path",
    y_col=your_label_columns,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='raw'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="image_path",
    y_col=your_label_columns,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='raw'
)



'''num_channels typically refers to the number of channels or color channels in an image.

In the context of the provided code, num_channels specifies the number of color channels for the input images. For example:

For grayscale images, num_channels would be 1.
For RGB (Red, Green, Blue) color images, num_channels would be 3, as there are three color channels representing the intensity of each color.'''

# Define the model architecture
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_width, num_channels)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax')           #num_classes is = number of ingredients
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(train_generator, epochs=10, validation_data=test_generator)