In [1]:
#run only for the collab not in local
from google.colab import files
files.upload()  # Upload your kaggle.json here


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"whitewolf18","key":"5791652d7a3c416e1d99a4ebba0fe6f4"}'}

In [2]:
#run only for the collab not in local
import os
import zipfile

os.makedirs('/root/.kaggle', exist_ok=True)

!mv kaggle.json /root/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json


In [3]:
!kaggle datasets download iamsouravbanerjee/indian-food-images-dataset --unzip

Dataset URL: https://www.kaggle.com/datasets/iamsouravbanerjee/indian-food-images-dataset
License(s): other
Downloading indian-food-images-dataset.zip to /content
 99% 351M/355M [00:00<00:00, 1.22GB/s]
100% 355M/355M [00:00<00:00, 1.22GB/s]


In [1]:
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.applications.efficientnet import preprocess_input
import numpy as np
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt
import os
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [None]:
os.remove("List of Indian Foods.txt")

In [None]:
os.rename("Indian Food Images", "dataset")

In [None]:
DATA_DIR = "dataset/Indian Food Images"

In [None]:
food_list = os.listdir(DATA_DIR)

In [None]:
food_list

In [None]:
len(food_list)

In [None]:
def load_and_preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(300, 300))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    return img_array

In [None]:
def create_file_list():
    """Create a list of (file_path, label) tuples instead of loading all images"""
    file_list = []
    for food in food_list:
        food_dir = os.path.join(DATA_DIR, food)
        if os.path.isdir(food_dir):
            for img_file in os.listdir(food_dir):
                img_path = os.path.join(food_dir, img_file)
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    file_list.append((img_path, food))
    return file_list

In [None]:
file_list = create_file_list()
print(f"Total files found: {len(file_list)}")

In [None]:
def get_train_test_split(file_list, test_size=0.2):
    """Split file paths into train and test sets"""
    np.random.shuffle(file_list)
    split_index = int(len(file_list) * (1 - test_size))
    train_files = file_list[:split_index]
    test_files = file_list[split_index:]
    return train_files, test_files

In [None]:
train_files, test_files = get_train_test_split(file_list)
print(f"Training files: {len(train_files)}")
print(f"Test files: {len(test_files)}")

In [None]:
import tensorflow as tf
from tensorflow.keras.utils import Sequence, to_categorical
import numpy as np

class ImageDataGenerator(Sequence):
    def __init__(self, file_list, label_encoder, num_classes, batch_size=32, shuffle=True):
        self.file_list = file_list
        self.label_encoder = label_encoder
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.file_list))
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __len__(self):
        return int(np.ceil(len(self.file_list) / self.batch_size))
    
    def __getitem__(self, index):
        # Get batch indexes
        start_idx = index * self.batch_size
        end_idx = min((index + 1) * self.batch_size, len(self.file_list))
        batch_indexes = self.indexes[start_idx:end_idx]
        
        # Generate batch data
        batch_x = []
        batch_y = []
        
        for idx in batch_indexes:
            img_path, label = self.file_list[idx]
            try:
                img_array = load_and_preprocess_image(img_path)
                batch_x.append(img_array[0])  # Remove the extra dimension
                batch_y.append(label)
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")
                continue
        
        if len(batch_x) == 0:
            # Return empty batch if no images could be loaded
            return np.array([]), np.array([])
        
        # Encode labels and convert to one-hot
        batch_y_encoded = self.label_encoder.transform(batch_y)
        batch_y_one_hot = to_categorical(batch_y_encoded, num_classes=self.num_classes)
        
        return np.array(batch_x), batch_y_one_hot
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [None]:
# Create data generators for training and testing
batch_size = 16  # Reduced batch size for better memory management

# We need to create label encoder first, so let's do it here
all_labels = [label for _, label in file_list]
unique_labels = list(set(all_labels))
label_encoder = LabelEncoder()
label_encoder.fit(unique_labels)
num_classes = len(unique_labels)

train_generator = ImageDataGenerator(train_files, label_encoder, num_classes, batch_size=batch_size, shuffle=True)
test_generator = ImageDataGenerator(test_files, label_encoder, num_classes, batch_size=batch_size, shuffle=False)

print(f"Training batches: {len(train_generator)}")
print(f"Test batches: {len(test_generator)}")
print(f"Batch size: {batch_size}")
print(f"Number of classes: {num_classes}")


In [None]:
# Test the generator by loading one batch
sample_batch_x, sample_batch_y = train_generator[0]
print(f"Sample batch shape: {sample_batch_x.shape}")
print(f"Sample labels: {sample_batch_y[:5]}")  # Show first 5 labels

In [None]:
# Check generator information
print(f"Total training files: {len(train_files)}")
print(f"Total test files: {len(test_files)}")
print(f"Training batches per epoch: {len(train_generator)}")
print(f"Test batches: {len(test_generator)}")

# Test loading one batch to verify everything works
sample_x, sample_y = train_generator[0]
print(f"Sample batch X shape: {sample_x.shape}")
print(f"Sample batch Y shape: {sample_y.shape}")

In [None]:
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(300, 300, 3))
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.2)(x)
predictions = Dense(num_classes, activation='softmax')(x)  # Use dynamic num_classes

model = Model(inputs=base_model.input, outputs=predictions)

print(f"Model created with {num_classes} output classes")

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model using the generator for memory-efficient batch processing
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=test_generator,
    verbose=1
)

print("Training completed successfully!")

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# Show memory usage benefits
import psutil
import os
process = psutil.Process(os.getpid())
memory_usage = process.memory_info().rss / 1024 / 1024  # Convert to MB
print(f"Current memory usage: {memory_usage:.2f} MB")
print("Batch processing allows for training on large datasets without loading all images into memory at once!")

In [None]:
# Evaluate the model on test data using batch processing
test_loss, test_accuracy = model.evaluate(test_generator, verbose=1)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Example of making predictions on a single batch
sample_batch_x, sample_batch_y = test_generator[0]
predictions = model.predict(sample_batch_x)

# Show predictions for first few samples
for i in range(min(3, len(predictions))):
    predicted_class_idx = np.argmax(predictions[i])
    true_class_idx = np.argmax(sample_batch_y[i])
    
    predicted_class = label_encoder.inverse_transform([predicted_class_idx])[0]
    true_class = label_encoder.inverse_transform([true_class_idx])[0]
    
    confidence = predictions[i][predicted_class_idx] * 100
    
    print(f"Sample {i+1}:")
    print(f"  Predicted: {predicted_class} (Confidence: {confidence:.2f}%)")
    print(f"  True: {true_class}")
    print(f"  Correct: {'✓' if predicted_class == true_class else '✗'}")
    print()