In [None]:
!pip install roboflow

In [None]:
!pip install ultralytics==8.0.196

In [None]:
%matplotlib inline
import os
import torch
import torchvision
from torch import nn

In [None]:
# getting our annotated data from roboflow
from roboflow import Roboflow
rf = Roboflow(api_key="FzschU3rcFSGL0KcALsV")
project = rf.workspace("caloriescan").project("caloriescan-buf3z")
dataset = project.version(1).download("yolov8")

In [None]:
# Training custom YOLOv8 model
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
model.train(data='CalorieScan-1/data.yaml', epochs=10)

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# !rm -rf runs/detect/train

In [None]:
# !ls

In [None]:
# !yolo task=detect mode=predict model='/content/runs/detect/train/weights/best.pt' source='/content/drive/MyDrive/MLPR Project/Data/raw_data/1.jpg' save_txt=True

In [None]:
# Loading the custom trained model's parameters
from PIL import Image
model = YOLO('/content/runs/detect/train/weights/best.pt')
im = Image.open('/content/drive/MyDrive/MLPR Project/Data/raw_data/1.jpg')
results = model.predict(source=im, save=True, save_txt=True)


0: 480x640 6 bowls, 398.4ms
Speed: 6.2ms preprocess, 398.4ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)
Results saved to [1mruns/detect/predict[0m
1 label saved to runs/detect/predict/labels


In [None]:
# !zip -r '/content/runs2.zip' '/content/runs'

In [None]:
# Load the trained model
loaded_model = torchvision.models.resnet18(pretrained=False)
loaded_model.fc = nn.Linear(loaded_model.fc.in_features, 45)
loaded_model.load_state_dict(torch.load('/content/drive/MyDrive/MLPR Project/models/finetune_model_new.pth'))
loaded_model.eval()

In [None]:
dataset = torchvision.datasets.ImageFolder("/content/drive/MyDrive/MLPR Project/Data/cropped")

In [None]:
print(dataset.classes)

['Aloo_Nutri', 'Bhatura', 'Bhindi', 'Biryani', 'Black_Chana', 'Boondi_Curd', 'Chicken', 'Chocos', 'Chole_Rice', 'Curd', 'Dal', 'Dal_Rice', 'Fruit_Custard', 'Ghiya', 'Gobhi', 'Gulab_Jamun', 'Halwa', 'Honey_Chili_Potato', 'Kadhi', 'Kadhi_Rice', 'Kheer', 'Mix_Veg', 'Mushroom_Matar', 'Noodles', 'Nutri_Masala', 'Paneer', 'Papad', 'Pasta', 'Poori', 'Pumpkin_Sabzi', 'Rajma', 'Rice', 'Roti', 'Salad', 'Semiya_Kheer', 'Shahi_Tukda', 'Tori', 'White_Chana']


In [None]:
from PIL import Image
normalize = torchvision.transforms.Normalize(
    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
test_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize([256, 256]),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    normalize])

In [None]:
import os
import cv2
from torchvision import transforms

def convert_yolo_to_normal(image_width, image_height, bbox):
    """
    Convert YOLO label format to normal bounding box format.
    YOLO format: [class_id, x_center, y_center, width, height]
    Normal format: [x_min, y_min, x_max, y_max]
    """
    x_center, y_center, width, height = bbox[1:]
    x_min = int((x_center - width / 2) * image_width)
    y_min = int((y_center - height / 2) * image_height)
    x_max = int((x_center + width / 2) * image_width)
    y_max = int((y_center + height / 2) * image_height)

    return [x_min, y_min, x_max, y_max]

def crop_image(image_path, label_path, output_folder, pt=False):
    """
    Crop the part of the image specified by the YOLO-format label.
    """
    # Read the image
    image = cv2.imread(image_path)
    h, w, _ = image.shape
    pred_labels = []

    # Read the label file
    with open(label_path, 'r') as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        bbox = list(map(float, line.strip().split()))
        bbox_normal = convert_yolo_to_normal(w, h, bbox)

        # Crop the image
        cropped_image = image[bbox_normal[1]:bbox_normal[3], bbox_normal[0]:bbox_normal[2]]

        # Save the cropped image
        output_path = os.path.join(output_folder, f"{os.path.basename(image_path).split('.')[0]}_crop_{i}.jpg")
        cv2.imwrite(output_path, cropped_image)

        im = torchvision.io.read_image(output_path)
        im = im.float() / 255.0  # Convert to float and normalize to [0, 1]
        im = normalize(im)  # Apply normalization
        im = im.unsqueeze(0)  # Add batch dimension

        # Make prediction
        with torch.no_grad():
            output = loaded_model(im)

        print(torch.argmax(output))

        # Get predicted class
        predicted_class = torch.argmax(output).item()

        # Get label from dataset
        label = dataset.classes[predicted_class]
        if pt == True:
          print(label)
        pred_labels.append(label)
        os.remove(output_path)

        return pred_labels

In [1]:
crop_image('/content/drive/MyDrive/MLPR Project/Data/raw_data/1.jpg', '/content/runs/detect/predict/labels/1.txt', '/content/', True)

Blank
Blank
Curd
Semiya_Kheer
Salad
Black_Chana


In [None]:
cal_dict = {
    "Halwa": 285,
    "Dal": 230,
    "Roti": 297,
    "Rice": 242,
    "Papad": 371,
    "Mushroom_Matar": 173,
    "Chana_Salad": 195,
    "Dal_Rice": 293,
    "Mix_Veg": 162,
    "Kheer": 235,
    "Thepla": 120,
    "Green_Chutney": "40",
    "Chocos": 111,
    "Curd": 59,
    "Paneer": 265,
    "Semiya_Kheer": 249,
    "Black_Chana": 378,
    "Bhindi": 33,
    "Salad": 30,
    "Chole Rice": 300,
    "Chicken": 200,
    "Nutri_Masala": 150,
    "White_Chana": 180,
    "Boondi_Curd": 250,
     "Noodles": 300,
    "Fruit_Custard": 200,
    "Honey_Chili_Potato": 350,
    "Ghiya": 100,
    "Rajma": 200,
    "Gulab_Jamun": 250,
    "Shahi_Tukda": 400,
    "Aloo_Nutri": 250,
    "Bhatura": 300,
    "Biryani": 500,
    "Gobhi": 150,
    "Pasta": 350,
    "Pumpkin_Sabzi": 150,
    "Poori": 250,
    "Tori": 180,
    "Kadhi": 250,
    "Kadhi_Rice": 400,
    "Aloo_Gobhi": 200,
    "Fried_Rice": 350,
    "Aloo_Methi": 200,
    "Rajma_Rice": 450
}

In [4]:
import xml.etree.ElementTree as ET
import random

correct = 0
all = 0

def get_labels(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    labels = []
    for obj in root.iter('object'):
        label = obj.find('name').text
        labels.append(label)
    return labels

for i in range(30):
    cal = 0
    j = random.randint(1, 270)
    # model = YOLO('/content/runs/detect/train/weights/best.pt')
    im = Image.open('/content/drive/MyDrive/MLPR Project/Data/raw_data/1.jpg')
    results = model.predict(source=im, save=True, save_txt=True)
    pred_labels = crop_image(f'/content/drive/MyDrive/MLPR Project/Data/raw_data/{j}.jpg', f'/content/runs/detect/predict/labels/{j}.txt', '/content/')
    actual_labels = get_labels(f'/content/drive/MyDrive/MLPR Project/Data/labels_mlpr/{j}.jpg')
    for k in pred_labels:
      cal += cal_dict[k]
    correct += len(set(pred_labels).intersection(actual_labels))
    all += len(set(pred_labels))

print("Accuracy is", correct/all)

Accuracy is 78.68
