In [None]:
!pip install supabase ultralytics scikit-learn opencv-python requests pandas tqdm



In [None]:
import os
import requests
import cv2
import pandas as pd
from supabase import create_client
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
from tqdm import tqdm

# =========================
# SUPABASE CONFIG
# =========================
SUPABASE_URL = "https://zpezidrqlotoyequnywe.supabase.co"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InpwZXppZHJxbG90b3llcXVueXdlIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NjQyMTE5MzQsImV4cCI6MjA3OTc4NzkzNH0.mWBIhSNpmSaoPN1rytp9JlXdcv_kG9i6aNqBwxGo4q0"

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

# Bucket and table names (change if you used different ones)
BUCKET_NAME = "room_images"
LOGS_TABLE = "occupancy_logs"

print("Supabase client initialized.")


Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Supabase client initialized.


In [None]:
# Fetch all logs from Supabase
resp = supabase.table(LOGS_TABLE).select("*").execute()
rows = resp.data

print("Total rows in occupancy_logs:", len(rows))

df = pd.DataFrame(rows)

# Keep only rows that have a non-null, non-empty image_url
if "image_url" not in df.columns:
    raise ValueError("Column 'image_url' not found in occupancy_logs table!")

df = df[df["image_url"].notnull()]
df = df[df["image_url"] != ""]
df = df.reset_index(drop=True)

print("Rows with image_url:", len(df))
df.head()


Total rows in occupancy_logs: 739
Rows with image_url: 735


Unnamed: 0,id,timestamp,room,count,status,jetson_fps,image_url,model_name
0,d32a72b7-bd34-4900-84e9-c30812a5d518,2025-12-01T12:19:15+00:00,GK407,1,occupied,0.51,https://zpezidrqlotoyequnywe.supabase.co/stora...,
1,75f57822-2ddb-4b8c-ab74-a1f5da9b1257,2025-12-01T12:22:31+00:00,GK407,1,occupied,0.53,https://zpezidrqlotoyequnywe.supabase.co/stora...,yolov8n.pt
2,750bcb82-2c36-4c43-b9ac-f5e69eb82ee8,2025-12-01T12:23:03+00:00,GK407,1,occupied,0.52,https://zpezidrqlotoyequnywe.supabase.co/stora...,yolov8n.pt
3,2ea1185a-1fa7-4c8f-97dd-d9e940e67442,2025-12-01T12:23:13+00:00,GK407,1,occupied,4.69,https://zpezidrqlotoyequnywe.supabase.co/stora...,yolov8n.pt
4,a8f1a5f3-f1a9-4690-8af7-fc7eab4834df,2025-12-01T12:23:24+00:00,GK407,1,occupied,4.41,https://zpezidrqlotoyequnywe.supabase.co/stora...,yolov8n.pt


In [None]:
# Base dataset directory
BASE_DIR = "dataset"
IMG_ALL_DIR = os.path.join(BASE_DIR, "images_all")
LBL_ALL_DIR = os.path.join(BASE_DIR, "labels_all")

for d in [BASE_DIR, IMG_ALL_DIR, LBL_ALL_DIR]:
    os.makedirs(d, exist_ok=True)

print("Folders prepared:")
print(" -", IMG_ALL_DIR)
print(" -", LBL_ALL_DIR)


Folders prepared:
 - dataset/images_all
 - dataset/labels_all


In [None]:
valid_image_paths = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    url = row["image_url"]

    # Sanity checks
    if url is None or not isinstance(url, str) or len(url) < 10:
        print("Skipping invalid URL at row", idx)
        continue

    # Save as a simple deterministic name
    img_name = f"image_{idx}.jpg"
    img_path = os.path.join(IMG_ALL_DIR, img_name)

    try:
        r = requests.get(url, timeout=10)
        if r.status_code != 200:
            print("Download failed:", url, "status:", r.status_code)
            continue

        with open(img_path, "wb") as f:
            f.write(r.content)

        # Validate image is readable
        img = cv2.imread(img_path)
        if img is None:
            print("Corrupt image, removed:", img_path)
            os.remove(img_path)
            continue

        valid_image_paths.append(img_path)

    except Exception as e:
        print("Error downloading", url, "->", e)

print("Valid images downloaded:", len(valid_image_paths))


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 735/735 [35:49<00:00,  2.92s/it]

Valid images downloaded: 735





In [None]:
# Load YOLOv8 pretrained model
auto_label_model = YOLO("yolov8n.pt")  # downloaded automatically if not present

os.makedirs(LBL_ALL_DIR, exist_ok=True)

def auto_label_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print("Unreadable image:", img_path)
        return None

    # Detect only people (class 0)
    results = auto_label_model.predict(img, classes=[0], verbose=False)

    label_lines = []
    if len(results) > 0:
        r = results[0]
        if r.boxes is not None and r.boxes.xywhn is not None:
            for box in r.boxes.xywhn:
                x, y, w, h = box[:4].tolist()
                label_lines.append(f"0 {x} {y} {w} {h}")

    return label_lines

num_labeled = 0

for img_path in tqdm(valid_image_paths):
    labels = auto_label_image(img_path)
    if labels is None:
        continue

    # Some images may have no detected persons; skip or still create empty file
    if len(labels) == 0:
        # Option 1: skip images with no persons
        # print("No persons detected in:", img_path)
        continue

    img_name = os.path.basename(img_path)
    label_name = img_name.replace(".jpg", ".txt")
    label_path = os.path.join(LBL_ALL_DIR, label_name)

    with open(label_path, "w") as f:
        f.write("\n".join(labels))

    num_labeled += 1

print("Images with at least one person labeled:", num_labeled)


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 6.2MB 102.4MB/s 0.1s


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 735/735 [02:21<00:00,  5.20it/s]

Images with at least one person labeled: 733





In [None]:
import shutil

# Collect only images that have corresponding labels
paired_images = []
paired_labels = []

for img_path in valid_image_paths:
    img_name = os.path.basename(img_path)
    label_name = img_name.replace(".jpg", ".txt")
    label_path = os.path.join(LBL_ALL_DIR, label_name)

    if os.path.exists(label_path):
        paired_images.append(img_path)
        paired_labels.append(label_path)

print("Pairs (image+label):", len(paired_images))

# Train/val split (80/20)
train_imgs, val_imgs, train_lbls, val_lbls = train_test_split(
    paired_images, paired_labels, test_size=0.2, random_state=42
)

# Final YOLO folders
IMAGES_TRAIN = os.path.join(BASE_DIR, "images/train")
IMAGES_VAL = os.path.join(BASE_DIR, "images/val")
LABELS_TRAIN = os.path.join(BASE_DIR, "labels/train")
LABELS_VAL = os.path.join(BASE_DIR, "labels/val")

for d in [IMAGES_TRAIN, IMAGES_VAL, LABELS_TRAIN, LABELS_VAL]:
    os.makedirs(d, exist_ok=True)

def copy_pairs(img_paths, lbl_paths, img_dest, lbl_dest):
    for img_path, lbl_path in zip(img_paths, lbl_paths):
        img_name = os.path.basename(img_path)
        lbl_name = os.path.basename(lbl_path)

        shutil.copy2(img_path, os.path.join(img_dest, img_name))
        shutil.copy2(lbl_path, os.path.join(lbl_dest, lbl_name))

copy_pairs(train_imgs, train_lbls, IMAGES_TRAIN, LABELS_TRAIN)
copy_pairs(val_imgs, val_lbls, IMAGES_VAL, LABELS_VAL)

print("Train images:", len(os.listdir(IMAGES_TRAIN)))
print("Val images:", len(os.listdir(IMAGES_VAL)))
print("Train labels:", len(os.listdir(LABELS_TRAIN)))
print("Val labels:", len(os.listdir(LABELS_VAL)))


Pairs (image+label): 733
Train images: 586
Val images: 147
Train labels: 586
Val labels: 147


In [None]:
data_yaml = f"""
train: {IMAGES_TRAIN}
val: {IMAGES_VAL}

nc: 1
names: ["person"]
"""

with open("data.yaml", "w") as f:
    f.write(data_yaml)

print(open("data.yaml").read())



train: dataset/images/train
val: dataset/images/val

nc: 1
names: ["person"]



In [None]:
from ultralytics import YOLO

# Start from pretrained YOLOv8n
model = YOLO("yolov8n.pt")

# Train on our auto-labeled dataset
results = model.train(
    data="data.yaml",
    epochs=30,
    imgsz=640,
    batch=16,
    device="cpu"  # Colab CPU, ok for demo
)

print("Training complete. Check runs/detect/train for results.")


Ultralytics 8.3.233 üöÄ Python-3.12.12 torch-2.9.0+cu126 CPU (Intel Xeon CPU @ 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12

In [None]:
from google.colab import files
files.download('/content/runs/detect/train/weights/best.pt')
