# **"Shirt Color Prediction Using YOLOv8"**

### The purpose of this notebook is to test the YOLOv8 model for shirt color prediction.

#### **Libraries Used:** 
>ultralytics

>matplotlib.pyplot

>cv2

#### **What is ultralytics library?**
>Ultralytics is a library for object detection, segmentation, and instance segmentation. It is used for image and video analysis.

#### **How to use?**

>1. Load the best weights of YOLOv8 model trained on the custom shirts data

>2. Store the predictions of model

>3. Using different scripts, test it on images, videos and live feed


# +_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+

## **Install ultralytics**

In [1]:
!pip install ultralytics

^C
Collecting ultralytics
  Using cached ultralytics-8.2.71-py3-none-any.whl.metadata (41 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Using cached torch-2.4.0-cp312-cp312-win_amd64.whl.metadata (27 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Using cached torchvision-0.19.0-1-cp312-cp312-win_amd64.whl.metadata (6.1 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Using cached ultralytics_thop-2.0.0-py3-none-any.whl.metadata (8.5 kB)
Using cached ultralytics-8.2.71-py3-none-any.whl (863 kB)
Using cached opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
Downloading torch-2.4.0-cp312-cp312-win_amd64.whl (197.8 MB)
   ---------------------------------------- 0.0/197.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/197.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/197.8 MB ? eta -:--:--
   

## **Load the best weights of trained YOLO**

In [1]:
from ultralytics import YOLO

model = YOLO("best.pt")

OSError: [WinError 126] The specified module could not be found. Error loading "C:\Users\mshah\anaconda3\Lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.

## **Test model on image files**

In [None]:
import matplotlib.pyplot as plt
import cv2
from ultralytics import YOLO

# Load the image
image_path = 'car_139.jpg'
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for matplotlib

# Make predictions
results = model.predict(source=image_path)

# Plot the image with predictions
fig, ax = plt.subplots(figsize=(4, 4))
ax.imshow(image)
ax.axis('off')  # Hide axes
ax.set_title('Predictions on Image')

# Plot the bounding boxes and labels
for result in results:
    boxes = result.boxes
    for box in boxes:
        x1, y1, x2, y2 = box.xyxy[0]
        ax.add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, edgecolor='green', linewidth=2))
        label = f"{result.names[int(box.cls[0])]}: {box.conf[0]:.2f}"
        ax.text(x1, y1, label, fontsize=6, color='white', bbox=dict(facecolor='green', edgecolor='green', alpha=0.6))

plt.show()

In [None]:
pip install easyocr

## **Apply EasyOCR**

In [None]:
pip install opencv

In [None]:
import cv2
import easyocr
import matplotlib.pyplot as plt

# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'])  # Specify the languages you want to use

# Load the image
image_path = 'car_139.jpg'
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for matplotlib

# Make predictions using the YOLOv8 model
results = model.predict(source=image_path)

# Store extracted license plate texts
extracted_texts = []

# Process each detection
for result in results:
    boxes = result.boxes
    for box in boxes:
        x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates

        # Crop the license plate from the image
        license_plate = image_rgb[int(y1):int(y2), int(x1):int(x2)]

        # Apply OCR to the license plate image
        license_plate_text = reader.readtext(license_plate)

        # Extract the text and confidence from the OCR results
        if license_plate_text:
            # Initialize a list to hold the detected text lines
            text_lines = []
            for detection in license_plate_text:
                # Each detection contains (bounding box, text, confidence)
                bbox, text, confidence = detection
                # Store the extracted text and confidence
                extracted_texts.append((text, confidence))
                text_lines.append(text)  # Append the detected text line

            # Join all detected lines into a single string
            full_text = ' '.join(text_lines)

            # Draw the bounding box and license plate text on the image
            cv2.rectangle(image_rgb, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 0), 2)
            y = int(y1) - 10 if int(y1) - 10 > 10 else int(y1) + 10
            
            # Add background to the text
            (text_width, text_height), _ = cv2.getTextSize(f'License Plate: {full_text} (Conf: {confidence:.2f})', 
                                                           cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
            cv2.rectangle(image_rgb, (int(x1), y - text_height - 5), (int(x1 + text_width), y), 
                          (255, 255, 255), -1)
            
            # Draw the text with the background
            cv2.putText(image_rgb, f'License Plate: {full_text} (Conf: {confidence:.2f})', 
                        (int(x1), y), cv2.FONT_HERSHEY_SIMPLEX, 
                        0.4, (0, 0, 0), 1)

# Display the image with OCR results using Matplotlib
plt.figure(figsize=(5, 5))
plt.imshow(image_rgb)
plt.axis('off')  # Hide axes
plt.title('License Plate Detection with EasyOCR')
plt.show()

# Print the extracted license plate texts in a single line
if extracted_texts:
    # Join the text parts from extracted_texts
    final_text = ' '.join(text for text, conf in extracted_texts)
    print(f'Extracted License Plate: {final_text}')


## **SubPlot with EasyOCR**

In [None]:
import os
import cv2
import easyocr
import pandas as pd
import matplotlib.pyplot as plt

# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'])  # Specify the languages you want to use

# Set the folder path containing the images
folder_path = r'C:\Users\majid\vechile\vechileclass\images\test'

# Create a list to store the extracted license plate texts
extracted_texts = []

# Get the list of image files in the folder
image_files = [filename for filename in os.listdir(folder_path) if filename.endswith(('.jpg', '.png', '.jpeg', '.webp'))]
num_images = len(image_files)

# Create a figure with subplots
num_rows = (num_images + 1) // 2  # Calculate number of rows needed
fig, axes = plt.subplots(nrows=num_rows, ncols=2, figsize=(12, 12))

# Iterate over the images in the folder
for i, filename in enumerate(image_files):
    # Load the image
    image_path = os.path.join(folder_path, filename)
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for matplotlib

    # Make predictions using the YOLOv8 model
    results = model.predict(source=image_path)

    # Initialize a list to hold the detected text lines
    text_lines = []

    # Process each detection
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates

            # Crop the license plate from the image
            license_plate = image_rgb[int(y1):int(y2), int(x1):int(x2)]

            # Apply OCR to the license plate image
            license_plate_text = reader.readtext(license_plate)

            # Extract the text and confidence from the OCR results
            if license_plate_text:
                for detection in license_plate_text:
                    # Each detection contains (bounding box, text, confidence)
                    bbox, text, confidence = detection
                    text_lines.append(text)  # Append the detected text line

    # Join all detected lines into a single string
    full_text = ' '.join(text_lines)

    # Store the extracted license plate text in the list
    extracted_texts.append((filename, full_text))

    # Draw the bounding box and license plate text on the image
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates
            cv2.rectangle(image_rgb, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 0), 2)
            y = int(y1) - 10 if int(y1) - 10 > 10 else int(y1) + 10
            
            # Add background to the text
            (text_width, text_height), _ = cv2.getTextSize(f'License Plate: {full_text}', 
                                                           cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
            cv2.rectangle(image_rgb, (int(x1), y - text_height - 5), (int(x1 + text_width), y), 
                          (255, 255, 255), -1)
            
            # Draw the text with the background
            cv2.putText(image_rgb, f'License Plate: {full_text}', 
                        (int(x1), y), cv2.FONT_HERSHEY_SIMPLEX, 
                        0.4, (0, 0, 0), 1)

    # Display the image with OCR results in the subplot
    row = i // 2
    col = i % 2
    axes[row, col].imshow(image_rgb)
    axes[row, col].axis('off')
    axes[row, col].set_title(filename)

# Hide any unused subplots
for j in range(i + 1, num_rows * 2):
    axes[j // 2, j % 2].axis('off')

# Save the figure
plt.savefig('license_plate_detection.png')

# Create a DataFrame to store the extracted license plate texts
results_df = pd.DataFrame(extracted_texts, columns=['Filename', 'License Plate'])


In [None]:
results_df

## **Using Paddle OCR**

In [None]:
!pip install paddlepaddle

In [None]:
!pip install paddleocr

In [None]:
import os
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from paddleocr import PaddleOCR

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Specify the language you want to use

# Set the folder path containing the images
folder_path = 'X:/ML1/WEEK5/day2/'

# Create a list to store the extracted license plate texts
extracted_texts = []

# Get the list of image files in the folder
image_files = [filename for filename in os.listdir(folder_path) if filename.endswith(('.jpg', '.png', '.jpeg', '.webp'))]
num_images = len(image_files)

# Create a figure with subplots
num_rows = (num_images + 1) // 2  # Calculate number of rows needed
fig, axes = plt.subplots(nrows=num_rows, ncols=2, figsize=(12, 12))

# Iterate over the images in the folder
for i, filename in enumerate(image_files):
    # Load the image
    image_path = os.path.join(folder_path, filename)
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for matplotlib

    # Make predictions using the YOLOv8 model
    results = model.predict(source=image_path)

    # Initialize a list to hold the detected text lines
    text_lines = []

    # Process each detection
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates

            # Crop the license plate from the image
            license_plate = image_rgb[int(y1):int(y2), int(x1):int(x2)]

            # Apply OCR to the license plate image using PaddleOCR
            ocr_results = ocr.ocr(license_plate, cls=True)

            # Extract the text and confidence from the OCR results
            if ocr_results:
                for line in ocr_results[0]:  # Each line contains (box, (text, confidence))
                    _, (text, confidence) = line
                    text_lines.append(text)  # Append the detected text line

    # Join all detected lines into a single string
    full_text = ' '.join(text_lines)

    # Store the extracted license plate text in the list
    extracted_texts.append((filename, full_text))

    # Draw the bounding box and license plate text on the image
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates
            cv2.rectangle(image_rgb, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 0), 2)
            y = int(y1) - 10 if int(y1) - 10 > 10 else int(y1) + 10
            
            # Add background to the text
            (text_width, text_height), _ = cv2.getTextSize(f'License Plate: {full_text}', 
                                                           cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
            cv2.rectangle(image_rgb, (int(x1), y - text_height - 5), (int(x1 + text_width), y), 
                          (255, 255, 255), -1)
            
            # Draw the text with the background
            cv2.putText(image_rgb, f'License Plate: {full_text}', 
                        (int(x1), y), cv2.FONT_HERSHEY_SIMPLEX, 
                        0.4, (0, 0, 0), 1)

    # Display the image with OCR results in the subplot
    row = i // 2
    col = i % 2
    axes[row, col].imshow(image_rgb)
    axes[row, col].axis('off')
    axes[row, col].set_title(filename)

# Hide any unused subplots
for j in range(i + 1, num_rows * 2):
    axes[j // 2, j % 2].axis('off')

# Save the figure
plt.tight_layout()
plt.savefig('license_plate_detection_paddle.png')

# Create a DataFrame to store the extracted license plate texts
resultspaddle_df = pd.DataFrame(extracted_texts, columns=['Filename', 'License Plate'])

In [None]:
resultspaddle_df

## **Test model on video files**

In [None]:
import cv2

# Load the video
video_path = 'traffic.mp4'
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Make predictions
    results = model.predict(source=frame)

    # Draw bounding boxes and labels on the frame
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates
            conf = box.conf[0]  # Get the confidence
            cls = box.cls[0]  # Get the class

            # Draw the bounding box and label
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            cv2.putText(frame, f'Class: {result.names[int(box.cls[0])]}, Conf: {conf:.2f}', (int(x1), int(y1) - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame with predictions
    cv2.imshow('Video Predictions', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
!pip install sort-track

In [None]:
import cv2
import pandas as pd
import numpy as np
from paddleocr import PaddleOCR
from sort import Sort

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Set the path to the video file
video_path = 'X:/ML1/WEEK5/day2/traffic.mp4'  # Change this to your video file path

# Open the video file
cap = cv2.VideoCapture(video_path)

# Get the video's width, height, and frames per second (fps)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_with_ocr.mp4', fourcc, fps, (width, height))

# Create a list to store the extracted license plate texts
extracted_texts = []

# Initialize SORT tracker
tracker = Sort()

# Dictionary to store the license plate text for each tracked object
tracked_plates = {}

# Process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for processing

    # Make predictions using the YOLOv8 model
    results = model.predict(source=image_rgb)

    # Prepare detections for the SORT tracker
    detections = []
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            score = box.confidence[0]
            detections.append([x1, y1, x2, y2, score])

    # Update tracker with detections
    tracked_objects = tracker.update(np.array(detections))

    # Process each tracked object
    for obj in tracked_objects:
        x1, y1, x2, y2, obj_id = obj
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        # Crop the license plate from the image
        license_plate = image_rgb[y1:y2, x1:x2]

        # Apply OCR to the license plate image using PaddleOCR
        ocr_results = ocr.ocr(license_plate, cls=True)

        # Extract the text and confidence from the OCR results
        if ocr_results and ocr_results[0] is not None:
            for line in ocr_results[0]:  # Each line contains (box, (text, confidence))
                _, (text, confidence) = line
                if obj_id not in tracked_plates:  # Check if the plate is already detected
                    tracked_plates[obj_id] = text  # Add to the tracked plates

        # Get the license plate text for the current tracked object
        license_plate_text = tracked_plates.get(obj_id, "")

        # Draw the bounding box and license plate text on the image
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        y = y1 - 10 if y1 - 10 > 10 else y1 + 10

        # Add background to the text
        (text_width, text_height), _ = cv2.getTextSize(f'License Plate: {license_plate_text}', 
                                                       cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
        cv2.rectangle(frame, (x1, y - text_height - 5), (x1 + text_width, y), (255, 255, 255), -1)
        
        # Draw the text with the background
        cv2.putText(frame, f'License Plate: {license_plate_text}', (x1, y), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)

    # Write the frame into the file
    out.write(frame)

    # Display the processed frame
    cv2.imshow('Processed Video', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and writer objects
cap.release()
out.release()
cv2.destroyAllWindows()

# Create a DataFrame to store the extracted license plate texts
results_df = pd.DataFrame(extracted_texts, columns=['License Plate'])

#### Video Results:


## **Test model on live camera**

In [None]:
import cv2

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Make predictions
    results = model.predict(source=frame)

    # Draw bounding boxes and labels on the frame
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates
            conf = box.conf[0]  # Get the confidence
            cls = box.cls[0]  # Get the class

            # Draw the bounding box and label
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            cv2.putText(frame, f'Class: {result.names[int(box.cls[0])]}, Conf: {conf:.2f}', (int(x1), int(y1) - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame with predictions
    cv2.imshow('Video Predictions', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

## **Test model on multiple images**

In [None]:
import os
import glob
import matplotlib.pyplot as plt
import cv2
from ultralytics import YOLO

# Folder containing the images
folder_path = 'X:/ML1/WEEK4/day4/'

# Supported image file extensions
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.webp']

# Get all image files in the folder
image_files = []
for ext in image_extensions:
    image_files.extend(glob.glob(os.path.join(folder_path, ext)))

# Determine the number of rows and columns for the subplot grid
num_images = len(image_files)
cols = 3  # Number of columns in the subplot grid
rows = (num_images + cols - 1) // cols  # Number of rows needed

# Create a figure for subplots
fig, axes = plt.subplots(rows, cols, figsize=(15, rows * 5))
axes = axes.flatten()

# Process and plot each image
for idx, image_path in enumerate(image_files):
    # Load and convert the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Make predictions
    results = model.predict(source=image_path)

    # Plot the image with predictions
    ax = axes[idx]
    ax.imshow(image)
    ax.axis('off')  # Hide axes
    ax.set_title(os.path.basename(image_path))

    # Plot the bounding boxes and labels
    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            ax.add_patch(plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, edgecolor='blue', linewidth=2))
            label = f"{result.names[int(box.cls[0])]}: {box.conf[0]:.2f}"
            ax.text(x1, y1, label, fontsize=12, color='white', bbox=dict(facecolor='blue', edgecolor='blue', alpha=0.8))

# Hide any unused subplots
for i in range(num_images, len(axes)):
    axes[i].axis('off')

plt.tight_layout()
plt.show()


In [None]:
!python predict_modified.py model= 'X:/ML1/WEEK5/day2/runs_backup/detect/train2/weights/best.pt' source='traffic.mp4' 