# Food101 Multiclass Image Classification (PyTorch)

This notebook demonstrates a professional approach to multiclass image classification using a convolutional neural network (CNN) on the Food101 dataset with PyTorch.

## Install Required Libraries

Install the `datasets` library for loading Food101 and `torchvision` for image processing.

In [None]:
!pip install datasets torchvision

## Import Libraries

In [None]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from datasets import load_dataset
from PIL import Image
import numpy as np
from collections import Counter

## Download and Prepare a Balanced Subset of Food101

For faster training and demonstration, use a balanced subset of the dataset.

In [None]:
# Download the Food101 dataset
dataset = load_dataset('food101')

def balanced_middle_cut(ds, num_classes=101, samples_per_class=50):
    indices = []
    class_indices = {i: [] for i in range(num_classes)}
    for idx, label in enumerate(ds['label']):
        class_indices[label].append(idx)
    for label in range(num_classes):
        idxs = class_indices[label]
        if len(idxs) >= samples_per_class:
            start = (len(idxs) - samples_per_class) // 2
            indices.extend(idxs[start:start+samples_per_class])
        else:
            indices.extend(idxs)
    return ds.select(indices)

# Use a balanced subset for training and validation
middle_train = balanced_middle_cut(dataset['train'], samples_per_class=50)
middle_val = balanced_middle_cut(dataset['validation'], samples_per_class=25)

## Data Augmentation and Preprocessing

Apply data augmentation for training and normalization for validation.

In [None]:
train_transform = transforms.Compose([
    transforms.Resize(144),
    transforms.RandomResizedCrop(128),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(144),
    transforms.CenterCrop(128),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## PyTorch Dataset Wrapper

Wrap the Hugging Face dataset for use with PyTorch DataLoader. Ensure all images are RGB.

In [None]:
class Food101TorchDataset(torch.utils.data.Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img = self.dataset[idx]['image']
        label = self.dataset[idx]['label']
        if img.mode != 'RGB':
            img = img.convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label

train_dataset = Food101TorchDataset(middle_train, transform=train_transform)
val_dataset = Food101TorchDataset(middle_val, transform=val_transform)

## Create DataLoaders

In [None]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

## Define a Simple CNN Model

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=101):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)
print(model)

## Set Up Training Components

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

## Training Loop with Validation

In [None]:
num_epochs = 10
best_val_acc = 0.0
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_loss = val_loss / len(val_loader.dataset)
    val_acc = 100 * correct / total
    scheduler.step(val_loss)
    print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {epoch_loss:.4f} | Val Loss: {val_loss:.4f} | Val Accuracy: {val_acc:.2f}%')
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_food101_cnn.pth')

## Evaluate the Best Model

In [None]:
# Load best model weights
model.load_state_dict(torch.load('best_food101_cnn.pth'))
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Best Validation Accuracy: {100 * correct / total:.2f}%')

## Predict on a New Image

In [None]:
def preprocess_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = val_transform(image)
    return image.unsqueeze(0)

image_path = 'image_prime.jpg'  # Change to your image path
image_tensor = preprocess_image(image_path).to(device)
model.eval()
with torch.no_grad():
    output = model(image_tensor)
    pred_class = torch.argmax(output, dim=1).item()
class_names = dataset['train'].features['label'].names
print(f'Predicted Class: {class_names[pred_class]}')