**Ultralytics Installation**

In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.186-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.16-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.186-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.16-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.186 ultralytics-thop-2.0.16


**Dataset Preparation and YOLO Format Conversion**

In [None]:
import os
import cv2
import numpy as np
import random
import shutil

# class folders and mappings
classes = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
class_to_id = {cls: idx for idx, cls in enumerate(classes)}

#Root dir with class folders
root_dir = 'data'

#Output YOLO dataset dir
output_dir = 'Dataset'
os.makedirs(os.path.join(output_dir, 'train', 'images'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'train', 'labels'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'val', 'images'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'val', 'labels'), exist_ok=True)

#Optional: Preview dir for visualized bboxes
preview_dir = os.path.join(output_dir, 'previews')
os.makedirs(preview_dir, exist_ok=True)

#Function to auto-generate bbox using thresholding + contours (assumes uniform background)
def generate_bbox(image_path, threshold=240):
    img = cv2.imread(image_path)
    if img is None:
        return None

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return None

    # Largest contour as the object
    largest_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(largest_contour)

    # Normalize (0-1)
    img_h, img_w = img.shape[:2]
    x_center = (x + w / 2) / img_w
    y_center = (y + h / 2) / img_h
    width = w / img_w
    height = h / img_h

    return x_center, y_center, width, height, (x, y, w, h)  # Return normalized + denormalized for drawing

#Optional: Visualize bbox on image and save preview
def visualize_bbox(image_path, bbox_denorm, output_path, label):
    if bbox_denorm:
        img = cv2.imread(image_path)
        x, y, w, h = bbox_denorm
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        cv2.imwrite(output_path, img)

#Set to True to generate preview images for verification
visualize = True

#Total image counter
total_images = 0

#Process each class
for cls in classes:
    class_dir = os.path.join(root_dir, cls)
    images = [f for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    total_images += len(images)

    #Shuffle for random split
    random.shuffle(images)

    # Split: 8:2
    split_idx = int(0.8 * len(images))
    train_images = images[:split_idx]
    val_images = images[split_idx:]

    # Process train
    for img_name in train_images:
        src_path = os.path.join(class_dir, img_name)
        dst_img_path = os.path.join(output_dir, 'train', 'images', img_name)
        shutil.copy(src_path, dst_img_path)

        result = generate_bbox(src_path)
        if result:
            x_center, y_center, width, height, bbox_denorm = result
            label_path = os.path.join(output_dir, 'train', 'labels', img_name.rsplit('.', 1)[0] + '.txt')
            with open(label_path, 'w') as f:
                f.write(f"{class_to_id[cls]} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

            if visualize:
                preview_path = os.path.join(preview_dir, f"train_{img_name}")
                visualize_bbox(src_path, bbox_denorm, preview_path, cls)

    # Process val
    for img_name in val_images:
        src_path = os.path.join(class_dir, img_name)
        dst_img_path = os.path.join(output_dir, 'val', 'images', img_name)
        shutil.copy(src_path, dst_img_path)

        result = generate_bbox(src_path)
        if result:
            x_center, y_center, width, height, bbox_denorm = result
            label_path = os.path.join(output_dir, 'val', 'labels', img_name.rsplit('.', 1)[0] + '.txt')
            with open(label_path, 'w') as f:
                f.write(f"{class_to_id[cls]} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

            if visualize:
                preview_path = os.path.join(preview_dir, f"val_{img_name}")
                visualize_bbox(src_path, bbox_denorm, preview_path, cls)

print(f"Dataset prepared with {total_images} total images. Train: ~{int(0.8 * total_images)}, Val: ~{int(0.2 * total_images)}.")
if visualize:
    print(f"Preview images with drawn bboxes saved in: {preview_dir}. Check them to verify accuracy.")
print("Update data.yaml path to:", output_dir)

FileNotFoundError: [Errno 2] No such file or directory: 'data/cardboard'

**YAML Configuration File Content**

In [None]:
path: '/content/Dataset'  # Update to your output_dir
train: 'train/images'
val: 'val/images'
nc: 6
names: ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']

**GPU Detection and System Diagnostics**

In [None]:
# Check GPU availability
import torch
import subprocess

print("=== GPU Status ===")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"CUDA device name: {torch.cuda.get_device_name()}")
    print(f"CUDA version: {torch.version.cuda}")
else:
    print("⚠️ GPU not detected!")

# Check nvidia-smi
try:
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    print("\n=== nvidia-smi output ===")
    print(result.stdout)
except:
    print("nvidia-smi not available")

=== GPU Status ===
CUDA available: False
CUDA device count: 0
⚠️ GPU not detected!

=== nvidia-smi output ===
Tue Aug 26 13:17:17 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   34C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------

**PyTorch CUDA Reinstallation**

In [None]:

!pip uninstall torch torchvision torchaudio -y

# Install PyTorch with CUDA 12.4 support
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124
Looking in indexes: https://download.pytorch.org/whl/cu124
Collecting torch
  Using cached https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp312-cp312-linux_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp312-cp312-linux_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Using cached https://download.pytorch.org/whl/cu124/torchaudio-2.6.0%2Bcu124-cp312-cp312-linux_x86_64.whl.metadata (6.6 kB)
Using cached https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp312-cp312-linux_x86_64.

**Final GPU Verification and Model Training**

In [None]:
import torch
from ultralytics import YOLO

# Final GPU check
if torch.cuda.is_available():
    print(f"SUCCESS! GPU detected: {torch.cuda.get_device_name(0)}")
    device = 0
    batch_size = 16
else:
    print("Still no GPU - will use CPU")
    device = 'cpu'
    batch_size = 2

# Test GPU with a simple operation
if torch.cuda.is_available():
    x = torch.rand(5, 3).cuda()
    print(f"GPU tensor test successful: {x.device}")

# Training the model
model = YOLO('yolov8n.pt')
results = model.train(
    data='/content/data.yaml',
    epochs=50,
    imgsz=640,
    batch=batch_size,
    device=device,
    name='scrap_detector'
)

✅ SUCCESS! GPU detected: Tesla T4
✅ GPU tensor test successful: cuda:0
Ultralytics 8.3.186 🚀 Python-3.12.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=scrap_detector, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, ove

**Training Results Directory Exploration**

In [None]:
!ls -R /content/runs/detect

/content/runs/detect:
scrap_detector

/content/runs/detect/scrap_detector:
args.yaml			 labels.jpg	      train_batch2.jpg
BoxF1_curve.png			 results.csv	      val_batch0_labels.jpg
BoxP_curve.png			 results.png	      val_batch0_pred.jpg
BoxPR_curve.png			 train_batch0.jpg     val_batch1_labels.jpg
BoxR_curve.png			 train_batch1.jpg     val_batch1_pred.jpg
confusion_matrix_normalized.png  train_batch2000.jpg  val_batch2_labels.jpg
confusion_matrix.png		 train_batch2001.jpg  val_batch2_pred.jpg
labels_correlogram.jpg		 train_batch2002.jpg  weights

/content/runs/detect/scrap_detector/weights:
best.pt  last.pt


** Video Processing and Model Inference**

In [None]:
from google.colab import files
import torch
from ultralytics import YOLO

#Upload video file
print("Please upload your video file:")
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

#Load your trained model
model = YOLO('best.pt')

#Process video
results = model.predict(
    source=video_path,
    device=0 if torch.cuda.is_available() else 'cpu',
    save=True,
    conf=0.5,
    show=False
)

#Download the result
from google.colab import files
import os

#Find the output video
output_dir = 'runs/detect/predict'
for file in os.listdir(output_dir):
    if file.endswith('.mp4') or file.endswith('.avi'):
        files.download(os.path.join(output_dir, file))
        break
