In [9]:
import cv2
import torch
import numpy as np
from keras.models import load_model # type: ignore
from keras.applications.imagenet_utils import preprocess_input # type: ignore
import os

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


In [4]:
# Load your trained CNN model
cnn_model = load_model('cnn_model_v01.h5')

# Load YOLOv5s model (CPU-only)
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s', device='cpu')



Using cache found in C:\Users\Anurag Katkar/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2025-7-13 Python-3.10.18 torch-2.5.1 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [5]:
# Define your traffic sign class names (adjust according to your dataset)
class_names = [
    "Speed limit 20", "Speed limit 30", "Speed limit 50", "Speed limit 60",
    "Speed limit 70", "Speed limit 80", "End of speed limit 80", "Speed limit 100",
    "Speed limit 120", "No passing", "No passing for trucks", "Right-of-way at intersection",
    "Priority road", "Yield", "Stop", "No vehicles", "No trucks",
    "No entry", "General caution", "Dangerous curve left", "Dangerous curve right",
    "Double curve", "Bumpy road", "Slippery road", "Road narrows on the right",
    "Road work", "Traffic signals", "Pedestrians", "Children crossing",
    "Bicycles crossing", "Beware of ice/snow", "Wild animals crossing",
    "End of all restrictions", "Turn right ahead", "Turn left ahead",
    "Ahead only", "Go straight or right", "Go straight or left", "Keep right",
    "Keep left", "Roundabout", "End of no passing", "End of no passing for trucks"
]

# Preprocess cropped sign for CNN
def preprocess_sign(image):
    image = cv2.resize(image, (32, 32))            # Resize to model input size
    image = image.astype("float32") / 255.0         # Normalize
    image = np.expand_dims(image, axis=0)           # Add batch dimension
    return image


In [None]:

# Start webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv5 on the current frame
    results = yolo_model(frame)
    # Get detection results
    detections = results.xyxy[0]  # xyxy format: (x1, y1, x2, y2, conf, cls)
    print("YOLOv5 Results:", results)
    print("Detections:", detections)

    for *box, conf, cls in detections:
        x1, y1, x2, y2 = map(int, box)
        cropped_sign = frame[y1:y2, x1:x2]

        if cropped_sign.size == 0:
            continue

        try:
            # Preprocess and classify using CNN
            processed = preprocess_sign(cropped_sign)
            prediction = cnn_model.predict(processed)
            label = class_names[np.argmax(prediction)]

            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
        except Exception as e:
            print("Error classifying:", e)
            continue

    # Show the frame
    cv2.imshow("Traffic Sign Recognition", frame)

    # Press 'ESC' to exit
    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()

YOLOv5 Results: image 1/1: 480x640 1 bottle, 1 tv
Speed: 7.0ms pre-process, 125.5ms inference, 14.5ms NMS per image at shape (1, 3, 480, 640)
Detections: tensor([[2.11804e+02, 1.30358e+02, 5.35367e+02, 3.42207e+02, 5.40250e-01, 6.20000e+01],
        [1.98318e+02, 3.62983e+02, 2.10422e+02, 3.91719e+02, 3.42317e-01, 3.90000e+01]])
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 290ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
YOLOv5 Results: image 1/1: 480x640 1 person, 1 bottle, 1 tv
Speed: 8.0ms pre-process, 202.1ms inference, 2.0ms NMS per image at shape (1, 3, 480, 640)
Detections: tensor([[2.21952e+02, 1.31939e+02, 5.28606e+02, 3.42757e+02, 4.59097e-01, 6.20000e+01],
        [2.05696e+02, 1.49115e+02, 6.39811e+02, 4.74656e+02, 4.29901e-01, 0.00000e+00],
        [1.98044e+02, 3.62818e+02, 2.10472e+02, 3.91811e+02, 3.46726e-01, 3.90000e+01]])
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━

In [8]:
from ultralytics import YOLO

model = YOLO("yolov5su.pt")

In [11]:
yolo_model = YOLO("yolov5su.pt")         # or "best.pt" if it's your fine-tuned YOLOv5 model
cnn_model = load_model('cnn_model_v01.h5')

# Input/output
image_folder = "GTSDB/images/train"
output_file = "yolo_cnn_predictions.txt"

# Image size expected by CNN model
cnn_input_size = (64, 64)  # adjust as per your model

# Open results file
with open(output_file, "w") as f:
    for img_name in os.listdir(image_folder):
        if img_name.lower().endswith((".jpg", ".png", ".jpeg")):
            img_path = os.path.join(image_folder, img_name)
            image = cv2.imread(img_path)

            # YOLO detection
            results = yolo_model.predict(img_path, save=False, conf=0.25, verbose=False)
            boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)  # (x1, y1, x2, y2)

            # Start writing image name
            f.write(f"{img_name} ")

            for box in boxes:
                x1, y1, x2, y2 = box
                crop = image[y1:y2, x1:x2]

                # Resize crop for CNN
                # crop_resized = cv2.resize(crop, cnn_input_size)
                # crop_input = crop_resized.astype("float32") / 255.0
                # crop_input = np.expand_dims(crop_input, axis=0)

                # CNN Prediction
                pred = cnn_model.predict(preprocess_sign(crop), verbose=0)
                class_id = np.argmax(pred)

                # Write class to file
                f.write(f"{class_id} ")

            f.write("\n")



In [13]:
with open("yolo_cnn_predictions.txt", "r") as f:
    lines = f.readlines()

In [14]:
pred = []
for line in lines:
    parts = line.strip().split(' ')
    img_name = parts[0]
    if len(parts) > 1:
        class_ids = [int(cls_id) for cls_id in parts[1:]]
    else:
        class_ids = [99]
    if class_ids:
        pred.append((img_name, class_ids))

In [15]:
actual = []
for file in os.listdir("GTSDB/labels/train"):
    with open(os.path.join("GTSDB/labels/train", file), "r") as f:
        content = f.readlines()
        content = [line.strip() for line in content if line.strip()]
        if content:
            class_id = []
            for line in content:
                class_id.append(int(line.split()[0]))
            actual.append((file, class_id))   

In [16]:
sum = 0
count = 0

for i in range(len(actual)):
    count += (max(len(actual[i][1]), len(pred[i][1])))

for i in range(len(actual)):
    for j in pred[i][1]:
        if j in actual[i][1]:
            sum += 1
accuracy = sum / count
accuracy

0.05016181229773463