# Food Detection Model - Faster RCNN

In [None]:
import torch
import torchvision
import cv2
from torchvision.ops import box_iou
import matplotlib.pyplot as plt
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms.functional as F
from IPython.display import display, HTML
import json
import os
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import pandas as pd

## Helper Function

In [None]:
def load_model(checkpoint_path, model, optimizer, device):
    """
    Load the model and optimizer state from a checkpoint file and ensure they are moved to the specified device.
    """
    if os.path.isfile(checkpoint_path):
        # Add map_location to ensure the checkpoint is loaded to the correct device
        checkpoint = torch.load(checkpoint_path, map_location=device)

        # Load the model state
        model.load_state_dict(checkpoint['model_state_dict'])

        # Load the optimizer state
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

        # Ensure optimizer's stored states are on the right device
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(device)

        # Load other information
        epoch = checkpoint['epoch']
        print(f"Checkpoint loaded from {checkpoint_path} at epoch {epoch + 1}")
    else:
        print("No checkpoint found at specified path!")

    return model, optimizer, epoch

In [None]:
# Load label map
label_map_file_path = '/content/label_map_fridgeapp.json'
with open(label_map_file_path, 'r') as f:
    loaded_label_map = json.load(f)

## Faster-RCNN Model Structure

In [None]:
class CustomTwoMLPHead(nn.Module):
    def __init__(self, in_channels, representation_size):
        super(CustomTwoMLPHead, self).__init__()
        self.fc6 = nn.Linear(in_channels, 4096)
        self.fc7 = nn.Linear(4096, 2048)
        self.fc8 = nn.Linear(2048, representation_size)

    def forward(self, x):
        x = x.flatten(start_dim=1)
        x = nn.functional.relu(self.fc6(x))
        x = nn.functional.relu(self.fc7(x))
        x = nn.functional.relu(self.fc8(x))
        return x

class CustomFasterRCNN(torch.nn.Module):
    def __init__(self, num_classes = 73):
        super(CustomFasterRCNN, self).__init__()
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
        in_channels = 12544
        representation_size = 1024

        self.model.roi_heads.box_head = CustomTwoMLPHead(in_channels=in_channels, representation_size=representation_size)

        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    def forward(self, images, targets=None):
        return self.model(images, targets)

## List Model Structure

In [None]:
from torchvision.ops import box_iou
from IPython.display import display, HTML

class ImageObjectDetector:
    def __init__(self, model, label_map, device):
        self.model = model.to(device)
        self.device = device
        self.label_map = label_map
        # Inverting the label_map for reverse lookup
        self.reverse_label_map = {v: k for k, v in self.label_map.items()}

    def predict_image(self, image_path):
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (256, 256))
        img_tensor = F.to_tensor(img).unsqueeze(0)
        img_tensor = img_tensor.to(self.device)

        self.model.eval()
        with torch.no_grad():
            predictions = self.model(img_tensor)

        predictions = [{k: v.to('cpu') for k, v in t.items()} for t in predictions]
        prediction = predictions[0]

        return prediction

    def draw_boxes_on_image(self, image_path, boxes, labels, scores, threshold=0.4):
        image = cv2.imread(image_path)
        image_with_boxes = cv2.resize(image, (400, 400))
        orig_h, orig_w, _ = image_with_boxes.shape

        # List to store unique labels in this photo
        high_confidence_labels = []

        # List to store labels with high confidence
        items = []
        # List to store labels with their probailities
        probabilities = []
        # List to store labels with their ids
        ids = []
        # Inilital Counter
        counter = 1

        for box, label, score in zip(boxes, labels, scores):
            if score < threshold:
                continue

            item_id = counter
            counter += 1

            box = box.int().numpy()
            start_point = (int(box[0] * orig_w / 256), int(box[1] * orig_h / 256))
            end_point = (int(box[2] * orig_w / 256), int(box[3] * orig_h / 256))

            cv2.rectangle(image_with_boxes, start_point, end_point, (0, 255, 0), 2)

            label_name = self.reverse_label_map[label.item()]
            if label_name not in high_confidence_labels:
               high_confidence_labels.append(label_name)  # Add label to the list

            label_score = f'{item_id}. {label_name}: {score.item():.2f}'

            # Add label to the list
            items.append(f"{label_name}")
            # Add probability to the list
            probabilities.append(score.item())
            # Add id to the list
            ids.append(item_id)

            # Calculate text size for background rectangle
            text_size = cv2.getTextSize(label_score, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)[0]
            text_bg_rect_start = (start_point[0], end_point[1] - text_size[1] - 5)
            text_bg_rect_end = (start_point[0] + text_size[0], end_point[1])
            # text_bg_rect_start = (start_point[0], start_point[1] - text_size[1] - 5)
            # text_bg_rect_end = (start_point[0] + text_size[0], start_point[1])

            # Draw white background rectangle
            cv2.rectangle(image_with_boxes, text_bg_rect_start, text_bg_rect_end, (255, 255, 255), -1)

            # Draw text
            # cv2.putText(image, label_score, (start_point[0], start_point[1] - 5),
            #             cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
            cv2.putText(image_with_boxes, label_score, (start_point[0], end_point[1] - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)

        plt.figure(figsize=(6, 6))
        plt.imshow(cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()

        print("-----------------------------------------")

        # Create a DataFrame to store the results
        results_df = pd.DataFrame({
            'ID': ids,
            'Item and Probability': items,
            'Probability': probabilities
        })

        # Display the DataFrame
        display(HTML(results_df.to_html(index=False)))

        # print("Unique Ingredients from this photo:")
        # for i, label in enumerate(high_confidence_labels):
        #     # Print the label
        #     print(f'{i+1}. {label}')

        # Return Unique List and Image with Boxes
        return high_confidence_labels, cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)

    def detect_and_draw_boxes(self, image_path, threshold=0.5, iou_threshold=0.75):
        prediction = self.predict_image(image_path)
        # Directly use the tensors without unnecessary conversion
        boxes = prediction['boxes']
        scores = prediction['scores']
        labels = prediction['labels']

        # Sort and NMS
        sorted_indices = torch.argsort(scores, descending=True)
        boxes = boxes[sorted_indices]
        scores = scores[sorted_indices]
        labels = labels[sorted_indices]

        # keep = torchvision.ops.nms(boxes, scores, nms_threshold)

        # Initialize a mask to keep track of which boxes to keep
        keep = torch.ones_like(scores, dtype=torch.bool)

        # Loop through the boxes
        for i in range(boxes.size(0)):
            if keep[i] == 1:
            # Compute IoU with all other boxes
              ious = torchvision.ops.box_iou(boxes[i].unsqueeze(0), boxes).squeeze(0)
              # Filter out boxes with IoU > 0.3
              keep[i+1:] = keep[i+1:] & (ious[i+1:] < iou_threshold)

        final_boxes = boxes[keep]
        final_scores = scores[keep]
        final_labels = labels[keep]

        return self.draw_boxes_on_image(image_path, final_boxes, final_labels, final_scores, threshold)

## Apply Model

In [None]:
# Load Best Model
best_checkpoint_path = '/content/checkpoint_29.pth'
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = CustomFasterRCNN()
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
best_model, now_optimizer, best_epoch = load_model(best_checkpoint_path, model, optimizer, device)

In [None]:
# Build detector
detector = ImageObjectDetector(model = best_model, label_map = loaded_label_map, device=device)

In [None]:
# Set Image Path
test_image = '/content/test_image.png' # Import any image you want

In [None]:
# Display
high_confidence_labels, image_with_boxes = detector.detect_and_draw_boxes(image_path = test_image)

In [None]:
# Check Unique label
if high_confidence_labels:
    print("High confidence labels detected in the image:")
    for i, label in enumerate(high_confidence_labels, start=1):
        print(f'{i}. {label}')
else:
    print("No high confidence labels detected.")

In [None]:
# Show Image with Boxes
image_with_boxes