In [5]:
!pip install pytorch-lightning torch torchvision torchmetrics pillow numpy


Collecting pytorch-lightning
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics
  Downloading torchmetrics-1.6.0-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Downloading pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.2/815.2 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading torchmetrics-1.6.0-py3-none-any.whl (926 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m926.4/926.4 kB[0m [31m46.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.11.9 pytorch-lightning-2.4.0 torchmetrics-1.6.0


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import pytorch_lightning as L
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from PIL import Image

In [12]:
# Define the transfer learning model with added accuracy metrics
class ImagenetTransferLearning(L.LightningModule):
    def __init__(self):
        super().__init__()

        # Initialize a pre-trained ResNet-50 model
        backbone = models.resnet50(pretrained=True)

        # Extract number of features
        num_filters = backbone.fc.in_features
        layers = list(backbone.children())[:-1]  # Remove the last layer
        self.feature_extractor = nn.Sequential(*layers)

        # Freeze feature extractor layers
        for param in self.feature_extractor.parameters():
            param.requires_grad = False
        self.feature_extractor.eval()

        # Classifier for 25 classes
        num_target_classes = 25
        self.classifier = nn.Linear(num_filters, num_target_classes)

        # Initialize accuracy metrics for multiclass classification with 39 classes
        self.train_accuracy = Accuracy(task='multiclass', num_classes=num_target_classes)
        self.val_accuracy = Accuracy(task='multiclass', num_classes=num_target_classes)

    def forward(self, x):
        representations = self.feature_extractor(x).flatten(1)
        x = self.classifier(representations)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = self.train_accuracy(logits, y)

        # Log loss and accuracy
        self.log('train_loss', loss, prog_bar=True, on_step=True)
        self.log('train_acc', acc, prog_bar=True, on_step=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = self.val_accuracy(logits, y)

        # Log validation loss and accuracy
        self.log('val_loss', loss, prog_bar=True, on_epoch=True)
        self.log('val_acc', acc, prog_bar=True, on_epoch=True)
        return loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=5e-3)

# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load custom dataset using ImageFolder
data_dir = '/content/drive/MyDrive/Colab Notebooks/Final_Project/pic_final_2'  # Update this path to the root directory of your 39 class folders
dataset = ImageFolder(root=data_dir, transform=transform)

# Define 5% of the original data for training and validation
total_samples = len(dataset)
train_samples = int(total_samples * 0.8)  # 80% for training
val_samples = total_samples - train_samples  # 20% for validation

# Split the dataset: 80% for training, 20% for validation
train_subset, val_subset = random_split(dataset, [train_samples, val_samples])

# Create data loaders
train_loader = DataLoader(train_subset, batch_size=5, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=5)

# Initialize model and trainer
model = ImagenetTransferLearning()

if torch.cuda.is_available():
    trainer = L.Trainer(
        accelerator="gpu",
        devices=1,
        limit_train_batches=1000,  # Data to use each epoch
        max_epochs=15,
    )
else:
    trainer = L.Trainer(
        accelerator="cpu",
        limit_train_batches=1000,  # Data to use each epoch
        max_epochs=15,
    )


# Train the model
trainer.fit(model=model, train_dataloaders=train_loader, val_dataloaders=val_loader)




INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name              | Type               | Params | Mode 
-----------------------------------------------------------------
0 | feature_extractor | Sequential         | 23.5 M | eval 
1 | classifier        | Linear             | 51.2 K | train
2 | train_accuracy    | MulticlassAccuracy | 0      | train
3 | val_accuracy      | MulticlassAccuracy | 0      | train
-----------------------------------------------------------------
51.2 K    Trainable params
23.5 M    Non-trainable params
23.6 M    Total params
94.237    Total estimated model params size (MB)
3         Modules in train mode
150       Modules in eval mode

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.


In [13]:
import matplotlib.pyplot as plt
from PIL import Image
import torch

def classify_image_in_parts_with_visualization(model, image_path, transform):
    # Ask the user for the number of shelves (parts) in the fridge
    num_shelves = int(input("How many shelves are there in the fridge? Please enter a number: "))

    model.eval()  # Set the model to evaluation mode
    image = Image.open(image_path).convert('RGB')  # Load image and convert to RGB
    width, height = image.size
    section_height = height // num_shelves  # Calculate the height of each section dynamically

    fig, axes = plt.subplots(num_shelves, 1, figsize=(5, num_shelves * 3))  # Adjust figure size based on the number of shelves

    for i in range(num_shelves):
        # Crop each section dynamically based on the number of shelves
        top = i * section_height
        bottom = top + section_height if i < num_shelves - 1 else height  # Ensure last section reaches the bottom of the image
        section = image.crop((0, top, width, bottom))

        # Apply transforms and add batch dimension
        transformed_section = transform(section).unsqueeze(0)

        # Run model inference
        with torch.no_grad():
            logits = model(transformed_section)
            probabilities = torch.softmax(logits, dim=1)

            # Get top K probabilities and class indices
            top_k = 10  # Display top 10 predictions
            top_probs, top_indices = torch.topk(probabilities, k=top_k, dim=1)

        # Convert probabilities and indices to lists for readability
        top_probs = top_probs.squeeze().tolist()
        top_indices = top_indices.squeeze().tolist()

        # Map class indices to class names
        class_probabilities = [(dataset.classes[idx], prob) for idx, prob in zip(top_indices, top_probs)]

        # Display each section
        ax = axes[i]
        ax.imshow(section.resize((width // 4, section_height // 4)))  # Display smaller version to save memory
        ax.axis('off')

        # Annotate the top-k predictions for this section
        label_text = "\n".join([f"{class_name}: {prob:.4f}" for class_name, prob in class_probabilities])
        ax.set_title(f"Section {i + 1} predictions:\n{label_text}", fontsize=8, loc='left')

    plt.tight_layout()
    plt.show()
    plt.close(fig)  # Close the figure to release memory


In [25]:
classify_image_in_parts_with_visualization(model, "PP.jpeg", transform)


How many shelves are there in the fridge? Please enter a number: 2


FileNotFoundError: [Errno 2] No such file or directory: '/content/PP.jpeg'

In [15]:
# Get transform parameters
transform_params = {
    'resize': transform.transforms[0].size,  # Assuming Resize is the first transform
     # Add other parameters from your transform if needed
}

# Save model and transform parameters
torch.save({
    'model_state_dict': model.state_dict(),
    'transform_params': transform_params
}, "model_and_transform.pth")

In [None]:
# Load the checkpoint
checkpoint = torch.load("model_and_transform.pth")

# Load the model state dictionary
model = ImagenetTransferLearning()  # Initialize the model architecture
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # Set the model to evaluation mode for inference

# Recreate the transform using the loaded parameters
transform = transforms.Compose([
    transforms.Resize(checkpoint['transform_params']["resize"]),
    transforms.ToTensor(),
])


  model.load_state_dict(torch.load("model.pth"))


ImagenetTransferLearning(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(

In [None]:
import os

# Specify the path to your root directory containing sub-folders
data_dir = '/content/drive/MyDrive/Colab Notebooks/Final_Project/pic_final'

# Get and sort the names of all sub-folders alphabetically
subfolder_names = sorted(
    [name for name in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, name))]
)

print(subfolder_names)




['Blueberries_pack', 'Broccoli', 'Dry_Penne_Pasta', 'Fresh_Chicken_Breast', 'Fresh_Fillet_Salmon', 'Fresh_Ground_Beef', 'Group_of_Avocados', 'Group_of_Bananas', 'Group_of_Carrots', 'Group_of_Champignon_Mushrooms', 'Group_of_Cucumbers', 'Group_of_Garlic_Bulbs', 'Group_of_Lemons', 'Group_of_Oranges', 'Group_of_Pineapples', 'Group_of_Red_Apples', 'Group_of_Strawberries', 'Group_of_Sweet_Potatoes', 'Group_of_Tomatoes', 'Group_of_White_Onions', 'Group_of_Yellow_Bell_Peppers', 'Group_of_Yellow_Potatoes', 'Lettuce', 'Sliced_Yellow_Cheese', 'White_Eggs_Tray']
