In [80]:
import os
import cv2
from ultralytics import YOLO
import pandas as pd
from sklearn.model_selection import train_test_split


In [81]:
train_folder = "dataset/train"
test_folder = "dataset/test"
valid_folder = "dataset/valid"

train_annotation = os.path.join(train_folder, "_annotations.csv")
test_annotation = os.path.join(test_folder, "_annotations.csv")
valid_annotation = os.path.join(valid_folder, "_annotations.csv")


In [82]:
def load_annotations(csv_file):
    return pd.read_csv(csv_file)

train_annotations = load_annotations(train_annotation)
test_annotations = load_annotations(test_annotation)
valid_annotations = load_annotations(valid_annotation)

print("Train Annotations:")
print(train_annotations.head())


Train Annotations:
                                           filename  width  height  \
0  101_jpeg.rf.0a1b2b7c9c5dc78d68e338c00628d072.jpg    640     640   
1  149_jpeg.rf.12791a08af9b198a4975c55cc72bd61c.jpg    640     640   
2  140_jpeg.rf.02c767f81d5d7d18f9c211d633a58f6f.jpg    640     640   
3  140_jpeg.rf.02c767f81d5d7d18f9c211d633a58f6f.jpg    640     640   
4  140_jpeg.rf.02c767f81d5d7d18f9c211d633a58f6f.jpg    640     640   

             class  xmin  ymin  xmax  ymax  
0  old-aged-person    22     3   400   640  
1  old-aged-person    26    70   309   489  
2  old-aged-person   464     3   622   303  
3  old-aged-person   335    34   442   297  
4  old-aged-person   221     5   338   224  


In [83]:
import yaml

base_path = 'dataset'

train_path = os.path.join(base_path, 'train', 'image')
valid_path = os.path.join(base_path, 'valid', 'image')
test_path = os.path.join(base_path, 'test', 'image')

classes = ['old-aged-person', 'cane', 'wheelchair']

data_dict = {
    'train': train_path,
    'val': valid_path,
    'test': test_path,
    'nc': len(classes),
    'names': classes,
}

yaml_file_path = os.path.join(base_path, 'datasett.yaml')
with open(yaml_file_path, 'w') as file:
    yaml.dump(data_dict, file)

print(f'datasett.yaml created at: {yaml_file_path}')


datasett.yaml created at: dataset/datasett.yaml


In [84]:
csv_file = 'dataset/valid/_annotations.csv'
image_folder = 'dataset/valid/image'
output_label_folder = 'dataset/valid/image'

os.makedirs(output_label_folder, exist_ok=True)

class_mapping = {
    'old-aged-person': 0,
    'cane': 1,
    'wheelchair': 2
}

df = pd.read_csv(csv_file)

In [85]:
for index, row in df.iterrows():
    filename = row['filename']
    width = row['width']
    height = row['height']
    class_name = row['class']
    xmin = row['xmin']
    ymin = row['ymin']
    xmax = row['xmax']
    ymax = row['ymax']

    x_center = (xmin + xmax) / 2 / width
    y_center = (ymin + ymax) / 2 / height
    bbox_width = (xmax - xmin) / width
    bbox_height = (ymax - ymin) / height

    class_id = class_mapping.get(class_name, 'unknown')
    
    if class_id == 'unknown':
        continue

    yolo_annotation = f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}\n"
    
    label_filename = os.path.join(output_label_folder, os.path.splitext(filename)[0] + '.txt')
    
    with open(label_filename, 'a') as f:
        f.write(yolo_annotation)

print("Conversion complete.")


Conversion complete.


In [86]:
import albumentations as A
output_augmented_folder = 'dataset/valid/augmented_image'

os.makedirs(output_augmented_folder, exist_ok=True)

transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Blur(blur_limit=3, p=0.2),
    A.ColorJitter(p=0.2),
    A.RandomBrightnessContrast(p=0.2),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=15, p=0.5)
])

image_path = os.path.join(image_folder, filename)
image = cv2.imread(image_path)

augmented = transform(image=image)
augmented_image = augmented['image']

augmented_image_filename = f"aug_{filename}"
augmented_image_path = os.path.join(output_augmented_folder, augmented_image_filename)
cv2.imwrite(augmented_image_path, augmented_image)

True

In [87]:
# base_path = 'dataset'

# train_path = os.path.join(base_path, 'train', 'image')
# valid_path = os.path.join(base_path, 'valid', 'image')
# test_path = os.path.join(base_path, 'test', 'image')
# augmented_path = os.path.join(base_path, 'valid', 'augmented_image')

# classes = ['old-aged-person', 'cane', 'wheelchair']

# data_dict = {
#     'train': [train_path, augmented_path], 
#     'val': valid_path,
#     'test': test_path,
#     'nc': len(classes),
#     'names': classes,
# }

# yaml_file_path = os.path.join(base_path, 'datasett_augmented.yaml')
# with open(yaml_file_path, 'w') as file:
#     yaml.dump(data_dict, file)

# print(f'datasett_augmented.yaml created at: {'dataset/datasett.yaml'}')


In [88]:
model = YOLO("yolov8n.pt")

model.train(data='dataset/datasett_augmented.yaml', epochs=50)

model.save('yolov8.pt')



New https://pypi.org/project/ultralytics/8.3.4 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.2 🚀 Python-3.12.2 torch-2.4.1 CPU (Apple M2)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset/datasett_augmented.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train53, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=Fa

[34m[1mtrain: [0mScanning /Users/hamooshaq/projek/sistem_disabilitas1/dataset/train/image.cache... 113 images, 1 backgrounds, 0 corrupt: 100%|██████████| 114/114 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /Users/hamooshaq/projek/sistem_disabilitas1/dataset/valid/image... 33 images, 0 backgrounds, 0 corrupt: 100%|██████████| 33/33 [00:00<00:00, 1828.36it/s]

[34m[1mval: [0mNew cache created: /Users/hamooshaq/projek/sistem_disabilitas1/dataset/valid/image.cache
Plotting labels to runs/detect/train53/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 63 weight(decay=0.0), 70 weight(decay=0.0005), 69 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train53[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G      1.952       3.53      2.083         58        640:  12%|█▎        | 1/8 [00:16<01:53, 16.22s/it]


KeyboardInterrupt: 

In [None]:
results = model.val()


Ultralytics 8.3.2 🚀 Python-3.12.2 torch-2.4.1 CPU (Apple M2)
Model summary (fused): 186 layers, 2,684,953 parameters, 0 gradients, 6.8 GFLOPs


[34m[1mval: [0mScanning /Users/hamooshaq/projek/sistem_disabilitas1/dataset/valid/image.cache... 33 images, 0 backgrounds, 0 corrupt: 100%|██████████| 33/33 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:10<00:00,  3.34s/it]


                   all         33         58      0.797      0.631      0.732       0.38
       old-aged-person         18         31      0.679      0.581      0.666      0.265
                  cane         15         20      0.711      0.616      0.721      0.334
            wheelchair          7          7          1      0.696      0.809       0.54
Speed: 1.9ms preprocess, 291.0ms inference, 0.0ms loss, 4.5ms postprocess per image
Results saved to [1mruns/detect/train242[0m


In [None]:
import cv2
from ultralytics import YOLO

model = YOLO('yolov8n.pt','yolov8.pt')

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    results = model(frame)

    annotated_frame = results[0].plot()

    cv2.imshow("Webcam", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 1 person, 80.0ms
Speed: 9.8ms preprocess, 80.0ms inference, 9.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 52.8ms
Speed: 2.9ms preprocess, 52.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.0ms
Speed: 2.4ms preprocess, 62.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 61.5ms
Speed: 2.0ms preprocess, 61.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 116.8ms
Speed: 2.8ms preprocess, 116.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 85.6ms
Speed: 2.3ms preprocess, 85.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 53.6ms
Speed: 2.5ms preprocess, 53.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 45.6ms
Speed: 1.9ms preprocess, 45.6ms inference, 0.4ms postprocess per image at shape (1, 3, 

In [None]:
model = YOLO("yolov8.pt")

cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    results = model(frame)

    frame_with_detections = results[0].plot()

    cv2.imshow("YOLOv8 Detection", frame_with_detections)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 (no detections), 60.1ms
Speed: 4.3ms preprocess, 60.1ms inference, 6.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 86.8ms
Speed: 2.2ms preprocess, 86.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 124.4ms
Speed: 3.4ms preprocess, 124.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 127.7ms
Speed: 3.5ms preprocess, 127.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 78.3ms
Speed: 3.9ms preprocess, 78.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 79.3ms
Speed: 3.4ms preprocess, 79.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 81.9ms
Speed: 3.2ms preprocess, 81.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 78.4ms
Speed: 3.3ms preprocess, 78.4



0: 384x640 (no detections), 152.3ms
Speed: 3.6ms preprocess, 152.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 111.6ms
Speed: 7.6ms preprocess, 111.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 105.3ms
Speed: 3.5ms preprocess, 105.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 117.3ms
Speed: 3.4ms preprocess, 117.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 115.6ms
Speed: 3.6ms preprocess, 115.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 145.3ms
Speed: 3.7ms preprocess, 145.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 114.6ms
Speed: 7.5ms preprocess, 114.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 151.6ms
Speed: 4.8ms prepro

In [None]:
model1 = YOLO("yolov8n.pt")
model2 = YOLO("yolov8.pt")

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results1 = model1(frame)
    results2 = model2(frame)

    combined_results = results1 + results2

    annotated_frame = combined_results[0].plot()

    cv2.imshow("Webcam", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
