# create environment and check  library
python -m venv yolov8-mnc
yolov8-mnc\Scripts\activate
pip install --upgrade pip
pip install ultralytics opencv-python matplotlib scikit-learn ipykernel
python -m ipykernel install --user --name yolov8-mnc --display-name "YOLOv8-MNC (py3.12)"

# test environment 


In [None]:
import sys
import torch
import ultralytics
print("Python version:", sys.version)
print("Ultralytics version:", ultralytics.__version__)
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


# Annotation Check

In [1]:
import os

images_dir = 'remove_duplicate_image/Cigarette-Detection/images'
labels_dir = 'remove_duplicate_image/Cigarette-Detection/labels'

image_files = sorted([f for f in os.listdir(images_dir) if f.endswith('.jpg')])
label_files = sorted([f for f in os.listdir(labels_dir) if f.endswith('.txt')])

print(f"Images: {len(image_files)}, Labels: {len(label_files)}")

images_without_labels = []
labels_without_images = []

for img in image_files:
    label = os.path.splitext(img)[0] + '.txt'
    if not os.path.exists(os.path.join(labels_dir, label)):
        images_without_labels.append(img)

for lbl in label_files:
    image = os.path.splitext(lbl)[0] + '.jpg'
    if not os.path.exists(os.path.join(images_dir, image)):
        labels_without_images.append(lbl)

print("Images without label:", images_without_labels[:5])
print("Labels without image:", labels_without_images[:5])


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'remove_duplicate_image/Cigarette-Detection/images'

# Train/Val Split

In [None]:
import shutil
from sklearn.model_selection import train_test_split

# Only keep images that have matching labels
images_with_labels = [f for f in image_files if os.path.exists(os.path.join(labels_dir, os.path.splitext(f)[0] + '.txt'))]

train_imgs, val_imgs = train_test_split(images_with_labels, test_size=0.2, random_state=42, shuffle=True)

print(f"Train: {len(train_imgs)}, Val: {len(val_imgs)}")

for split in ['train', 'val']:
    os.makedirs(f'dataset/images/{split}', exist_ok=True)
    os.makedirs(f'dataset/labels/{split}', exist_ok=True)

def move_split(img_list, split):
    for img in img_list:
        lbl = os.path.splitext(img)[0] + '.txt'
        shutil.copy(os.path.join(images_dir, img), f'dataset/images/{split}/{img}')
        shutil.copy(os.path.join(labels_dir, lbl), f'dataset/labels/{split}/{lbl}')

move_split(train_imgs, 'train')
move_split(val_imgs, 'val')


# Create data.yaml

In [None]:
yaml_content = """
path: dataset
train: images/train
val: images/val

names:
  0: cigarette
"""

with open('dataset/data.yaml', 'w') as f:
    f.write(yaml_content)

print("Created dataset/data.yaml!")


# Visualize Example Annotation

In [None]:
import cv2
import matplotlib.pyplot as plt

def show_annotated(idx=0, split='train'):
    img_folder = f'dataset/images/{split}'
    lbl_folder = f'dataset/labels/{split}'
    imgs = sorted(os.listdir(img_folder))
    if not imgs:
        print("No images found!")
        return
    img_path = os.path.join(img_folder, imgs[idx])
    lbl_path = os.path.join(lbl_folder, os.path.splitext(imgs[idx])[0] + '.txt')

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w, _ = img.shape

    with open(lbl_path) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue
            c, x, y, bw, bh = map(float, parts)
            x1 = int((x - bw/2) * w)
            y1 = int((y - bh/2) * h)
            x2 = int((x + bw/2) * w)
            y2 = int((y + bh/2) * h)
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(img, 'cigarette', (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1)
    plt.figure(figsize=(6,6))
    plt.imshow(img)
    plt.axis('off')
    plt.show()

show_annotated(0, 'train')


# Train YOLOv8-MNC

In [None]:
from ultralytics import YOLO

model = YOLO('yolov8mnc.pt')  # Make sure you have the correct YOLOv8-MNC weights file

model.train(
    data='dataset/data.yaml',
    epochs=400,
    imgsz=640,
    batch=32,     # Adjust for your VRAM
    device=0      # 'cpu' if no GPU
)
