In [None]:
import os
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split
import yaml
import glob
from tqdm import tqdm
import cv2

In [None]:
# Create a folder to extract
!mkdir -p /content/trash_dataset
# Unzip the uploaded file
!unzip -q /content/trash.zip -d /content/trash_dataset


replace /content/trash_dataset/README.dataset.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
import os
import pandas as pd

# Paths
images_dir = '/content/trash_dataset/train/images'
labels_dir = '/content/trash_dataset/train/labels'

# Get all image filenames
image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]

# Create CSV data
data = []
for img_file in image_files:
    label_file = os.path.splitext(img_file)[0] + '.txt'

    img_path = os.path.join(images_dir, img_file)
    label_path = os.path.join(labels_dir, label_file)

    # Only include if label file exists
    if os.path.exists(label_path):
        data.append({'image_path': img_path, 'label_path': label_path})

# Save to CSV
df = pd.DataFrame(data)
df.to_csv('annotations.csv', index=False)

print("annotations.csv created successfully!")

In [None]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

# --- Config ---
csv_file = 'annotations.csv'
dataset_root = 'dataset'
train_ratio = 0.8             # 80% training, 20% validation

# --- Make folders ---
os.makedirs(f'{dataset_root}/images/train', exist_ok=True)
os.makedirs(f'{dataset_root}/images/val', exist_ok=True)
os.makedirs(f'{dataset_root}/labels/train', exist_ok=True)
os.makedirs(f'{dataset_root}/labels/val', exist_ok=True)

# --- Load CSV ---
df = pd.read_csv(csv_file)

# --- Split CSV ---
train_df, val_df = train_test_split(df, test_size=1-train_ratio, random_state=42)

def copy_files(split_df, split):
    for _, row in split_df.iterrows():
        img_src = row['image_path']
        lbl_src = row['label_path']

        img_dst = os.path.join(dataset_root, 'images', split, os.path.basename(img_src))
        lbl_dst = os.path.join(dataset_root, 'labels', split, os.path.basename(lbl_src))

        shutil.copy(img_src, img_dst)
        shutil.copy(lbl_src, lbl_dst)


copy_files(train_df, 'train')
copy_files(val_df, 'val')

print(" Images and labels copied to YOLO folder structure.")


In [None]:
import os
import shutil
import yaml
from tqdm import tqdm

# --- CONFIG ---
SRC_ROOT = "dataset"               # your original YOLO dataset (images/, labels/)
DST_ROOT = "dataset_5class"        # new folder for merged dataset
os.makedirs(f"{DST_ROOT}/images/train", exist_ok=True)
os.makedirs(f"{DST_ROOT}/images/val", exist_ok=True)
os.makedirs(f"{DST_ROOT}/labels/train", exist_ok=True)
os.makedirs(f"{DST_ROOT}/labels/val", exist_ok=True)

# --- CLASS MERGE MAP ---
merge_map = {
    # Plastic
    "plastic": [
        "plastic", "plastic_bag", "plastic_container", "plastic_film",
        "plastic_lid", "plastic_pack", "plastic_straw", "plastic_utensils",
        "clear_plastic_bottle", "other_plastic", "other_plastic_bottle",
        "other_plastic_wrapper", "single-use_carrier_bag", "styrofoam_piece",
        "cups", "disposable_food_container", "disposable_plastic_cup"
    ],
    # Metal
    "metal": [
        "can", "drink_can", "metal", "foil", "aluminum_foil",
        "pop_tab", "metal_bottle_cap"
    ],
    # Paper / Cardboard
    "paper": [
        "paper", "normal_paper", "carton", "tetra_pack", "paper_cup",
        "drink_carton", "other_carton"
    ],
    # Glass
    "glass": [
        "glass_bottle", "other_glass", "jar"
    ],
    # Other
    "other": [
        "battery", "biological", "clothes", "clothing", "compostable",
        "electronic", "rope&strings", "trash", "unknown", "wood", "tissues", "other"
    ]
}

# Build lookup: fine class â†’ coarse class ID
all_classes = sum(merge_map.values(), [])
label_to_material = {}
for i, (mat, items) in enumerate(merge_map.items()):
    for c in items:
        label_to_material[c] = i

# --- Original class names (same as in your data.yaml) ---
orig_class_names = [
    "aluminum_foil", "battery", "biological", "bottle", "can", "carton",
    "cigarette", "clear_plastic_bottle", "clothes", "clothing",
    "compostable", "cups", "disposable_food_container",
    "disposable_plastic_cup", "drink_carton", "drink_can",
    "electronic", "foil", "glass_bottle", "jar", "metal",
    "metal_bottle_cap", "normal_paper", "other", "other_carton",
    "other_glass", "other_plastic", "other_plastic_bottle",
    "other_plastic_wrapper", "paper", "paper_cup", "plastic",
    "plastic_bag", "plastic_container", "plastic_film", "plastic_lid",
    "plastic_pack", "plastic_straw", "plastic_utensils", "pop_tab",
    "rope&strings", "single-use_carrier_bag", "styrofoam_piece",
    "tissues", "tetra_pack", "trash", "unknown", "wood"
]

# --- Rewrite labels ---
for split in ["train", "val"]:
    src_img_dir = os.path.join(SRC_ROOT, "images", split)
    src_lbl_dir = os.path.join(SRC_ROOT, "labels", split)
    dst_img_dir = os.path.join(DST_ROOT, "images", split)
    dst_lbl_dir = os.path.join(DST_ROOT, "labels", split)

    for lbl_file in tqdm(os.listdir(src_lbl_dir), desc=f"Processing {split}"):
        src_lbl_path = os.path.join(src_lbl_dir, lbl_file)
        dst_lbl_path = os.path.join(dst_lbl_dir, lbl_file)

        with open(src_lbl_path, "r") as f:
            lines = f.readlines()

        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            cls_id, xc, yc, w, h = parts
            cls_name = orig_class_names[int(cls_id)]
            if cls_name not in label_to_material:
                continue
            new_cls = label_to_material[cls_name]
            new_lines.append(f"{new_cls} {xc} {yc} {w} {h}\n")

        if new_lines:
            with open(dst_lbl_path, "w") as f:
                f.writelines(new_lines)
            # copy image only if it has valid labels
            img_name = lbl_file.replace(".txt", ".jpg")
            shutil.copy(os.path.join(src_img_dir, img_name), os.path.join(dst_img_dir, img_name))

# --- Write new data.yaml ---
data_yaml = {
    'path': os.path.abspath(DST_ROOT),
    'train': 'images/train',
    'val': 'images/val',
    'names': {0: "plastic", 1: "metal", 2: "paper", 3: "glass", 4: "other"}
}
yaml_path = os.path.join(DST_ROOT, 'data.yaml')
with open(yaml_path, 'w') as f:
    yaml.dump(data_yaml, f)

print("\nâœ… New 5-class dataset created at:", DST_ROOT)
print("âœ… You can now train YOLO using this file:", yaml_path)

In [None]:
pip install ultralytics

In [None]:
from ultralytics import YOLO
import os

# Path to your dataset config file (data.yaml)
data_yaml = r"/content/dataset_5class/data.yaml"

# --- Model setup ---
model_version = 'yolov8'   # no comma here!
variants = ['s']           # or ['m'] if GPU allows

for variant in variants:
    model_name = f'{model_version}{variant}.pt'
    model = YOLO(model_name)  # Load pre-trained YOLO model

    # --- Training ---
    model.train(
        data=data_yaml,        # path to data.yaml
        epochs=50,             # train longer for better accuracy
        imgsz=640,             # image size
        batch=16,              # adjust if memory issue
        name=f'{model_version}{variant}_trained',
        verbose=True
    )

    print(f"âœ… Training complete for {model_name}")

print("ðŸŽ¯ All models trained successfully.")


In [None]:
from ultralytics import YOLO

# load your trained model
model = YOLO("/content/runs/detect/yolov8s_trained/weights/best.pt")

# test on a folder of images (e.g., test set)
results = model.predict(source="/content/trash_dataset/test/images", save=True, imgsz=640)


In [None]:
from ultralytics import YOLO
from ultralytics import YOLO
from IPython.display import Video
# Load your trained model
model = YOLO("yolov8s.pt")

from ultralytics import YOLO
from ultralytics import YOLO
import cv2
from IPython.display import display, Image, clear_output
import time

# Path to uploaded video
video_path = "/content/video.mp4"

# Run prediction and save annotated video
results = model.predict(source=video_path, save=True)  # saves in runs/detect/predict

# Get path of saved video
output_video = results[0].path

# Display the video in Colab
Video(output_video, embed=True)
