In [None]:
! pip install ultralytics



# Check if the GPU is available

In [None]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

Using device: cpu


# Mounting the drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Unzipping the Dataset

In [None]:
!unzip '/content/drive/MyDrive/Bekar Dataset YOLOv8.zip' -d '/content/Latest_dataset_YOLOv8'

# Training the model

In [None]:
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO('yolov8s.pt')

# Train the model
model.train(data='/content/Latest_dataset_YOLOv8/data.yaml', epochs=120, imgsz=640, batch=16)

KeyboardInterrupt: 

# Saving the weights to drive

In [None]:
import shutil

# Source path (model saved by YOLO training)
source = '/content/runs/detect/train/weights/best.pt'

# Destination path in Google Drive
destination = '/content/drive/MyDrive/kitkat_s.pt'

# Copy the file
shutil.copy(source, destination)

print("Model saved successfully to Google Drive!")

Model saved successfully to Google Drive!


# Loading the model from drive for test

In [None]:
from ultralytics import YOLO

# Load the trained model from Google Drive
model = YOLO('/content/drive/MyDrive/kitkat_s.pt')

# Running the Test dataset on the trained weights


In [None]:
results = model.val(data='/content/Latest_dataset_YOLOv8/data.yaml', split='test')

# Testing the model on custom data

In [None]:
from ultralytics import YOLO
from PIL import Image
import numpy as np


# Run inference on your image
results = model('/content/WhatsApp Oct 17 Image.jpeg')

# Access the first result (this will give the result object for the image)
result = results[0]

# Use result.plot() for visualization but convert it to an image if needed
annotated_image = result.plot()  # This is a NumPy array

# Convert the NumPy array to a PIL image if needed for further processing
image = Image.fromarray(annotated_image.astype('uint8'), 'RGB')

# Now you can either display the image or save it
image.show()  # Show the image
image.save('annotated_output.jpeg')  # Save the annotated image




image 1/1 /content/WhatsApp Oct 17 Image.jpeg: 640x480 1 Tide, 421.8ms
Speed: 3.4ms preprocess, 421.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)


# Saving the weights on my local system

In [None]:
import zipfile
import os

# Step 1: Create a zip file of the runs folder
zip_file_path = '/content/runs.zip'

with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk('/content/runs'):
        for file in files:
            zipf.write(os.path.join(root, file),
                        os.path.relpath(os.path.join(root, file),
                                        os.path.join('/content/runs', '..')))

# Step 2: Download the zip file to your local computer
from google.colab import files
files.download(zip_file_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!pip install -q \
ultralytics \
supervision \
roboflow

In [None]:
from google.colab.patches import cv2_imshow

In [None]:
def detect_grocery_items(image_path, output_path):
    # Load the image
    image = cv2.imread(image_path)

    # Perform detection
    results = model(image)

    # Save the results to output path
    annotated_image = results[0].plot()
    cv2.imwrite(output_path, annotated_image)
    print(f"Annotated image saved at: {output_path}")

    # Get detected class IDs and count each class
    class_ids = results[0].boxes.cls.cpu().numpy()

    # Count the occurrence of each class
    class_counts = {}
    for class_id in class_ids:
        class_name = model.names[int(class_id)]  # Get the class name
        if class_name in class_counts:
            class_counts[class_name] += 1
        else:
            class_counts[class_name] = 1

    # Print a summary
    print("Detection Summary:")
    for class_name, count in class_counts.items():
        print(f"{class_name}: {count}")

    # Return the summary as a dictionary if needed
    return class_counts


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from google.colab.patches import cv2_imshow
from IPython.display import clear_output
from collections import defaultdict, deque
import time


def iou(box1, box2):
    # Calculate intersection over union
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

    iou = intersection / float(area1 + area2 - intersection)
    return iou

def smooth_box(box_history):
    if not box_history:
        return None
    return np.mean(box_history, axis=0)

def process_video(input_path, output_path):
    cap = cv2.VideoCapture(input_path)

    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Initialize variables for tracking
    detected_items = {}
    frame_count = 0

    # For result confirmation
    detections_history = defaultdict(lambda: defaultdict(int))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1

        # Run YOLO detection every 5th frame
        if frame_count % 5 == 0:
            results = model(frame)

            current_frame_detections = []

            for r in results:
                boxes = r.boxes
                for box in boxes:
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    conf = box.conf.item()
                    cls = int(box.cls.item())
                    brand = model.names[cls]

                    current_frame_detections.append((brand, [x1, y1, x2, y2], conf))

            # Match current detections with existing items
            for brand, box, conf in current_frame_detections:
                matched = False
                for item_id, item_info in detected_items.items():
                    if iou(box, item_info['smoothed_box']) > 0.5:  # IOU threshold
                        item_info['frames_detected'] += 1
                        item_info['total_conf'] += conf
                        item_info['box_history'].append(box)
                        if len(item_info['box_history']) > 10:  # Keep last 10 positions
                            item_info['box_history'].popleft()
                        item_info['smoothed_box'] = smooth_box(item_info['box_history'])
                        item_info['last_seen'] = frame_count
                        matched = True
                        break

                if not matched:
                    item_id = len(detected_items)
                    detected_items[item_id] = {
                        'brand': brand,
                        'box_history': deque([box], maxlen=10),
                        'smoothed_box': box,
                        'frames_detected': 1,
                        'total_conf': conf,
                        'last_seen': frame_count
                    }

                # Update detections history
                detections_history[brand][frame_count] += 1

        # Annotate the frame with all tracked items
        for item_id, item_info in list(detected_items.items()):
            # Remove items not seen for a while
            if frame_count - item_info['last_seen'] > fps * 2:  # 2 seconds
                del detected_items[item_id]
                continue

            # Interpolate box position
            if item_info['smoothed_box'] is not None:
                alpha = 0.3  # Interpolation factor
                current_box = item_info['smoothed_box']
                target_box = item_info['box_history'][-1] if item_info['box_history'] else current_box
                interpolated_box = [
                    current_box[i] * (1 - alpha) + target_box[i] * alpha
                    for i in range(4)
                ]
                item_info['smoothed_box'] = interpolated_box

                x1, y1, x2, y2 = map(int, interpolated_box)
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{item_info['brand']}",
                            (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # Write the frame to output video
        out.write(frame)

    cap.release()
    out.release()

    # Calculate final counts and confirm results
    total_frames = frame_count
    confirmed_items = {}
    for brand, frame_counts in detections_history.items():
        detection_frames = len(frame_counts)
        if detection_frames > total_frames * 0.1:  # Detected in more than 10% of frames
            avg_count = sum(frame_counts.values()) / detection_frames
            confirmed_items[brand] = round(avg_count)

    return confirmed_items

# Process the video and get the results
input_path = '/content/ajitesh.mp4'
output_path = 'annotated.mp4'
results = process_video(input_path, output_path)

# Print the final results
print("Detected Items:")
for brand, count in results.items():
    print(f"{brand}: Quantity: {count}")

print(f"\nAnnotated video saved to: {output_path}")


0: 640x384 1 Head-Shoulders, 1 Tide, 563.8ms
Speed: 8.5ms preprocess, 563.8ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Head-Shoulders, 1 Tide, 585.7ms
Speed: 3.5ms preprocess, 585.7ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Tide, 565.2ms
Speed: 5.4ms preprocess, 565.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Tide, 595.7ms
Speed: 3.5ms preprocess, 595.7ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Head-Shoulders, 1 Tide, 608.3ms
Speed: 3.4ms preprocess, 608.3ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Head-Shoulders, 1 Tide, 443.2ms
Speed: 3.9ms preprocess, 443.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 Head-Shoulders, 1 Tide, 358.8ms
Speed: 3.3ms preprocess, 358.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections),