In [None]:
# ==========================================================
# 🧠 YOLOv8 Segmentation Training (Hugging Face Dataset)
# ==========================================================

%pip install ultralytics datasets huggingface_hub --quiet

from datasets import load_dataset
from ultralytics import YOLO
import os, shutil
from huggingface_hub import login, snapshot_download
from huggingface_hub import HfApi

In [None]:
# ----------------------------------------------------------
# 1️⃣  CONFIG
# ----------------------------------------------------------

HF_DATASET = "duartepcruz/acacia_dataset"
SUBDIR = "data"                        # where to save locally
EPOCHS = 50
IMG_SIZE = 640
hf_token = os.environ.get("HF_API_TOKEN")
hub_model_id = os.environ.get("HUGGINGFACE_MODEL_ID")

# --- Authenticate ---
print("🔐 Logging in to Hugging Face Hub...")
login(token=hf_token)

Cloning the dataset

In [None]:
# -------------------------------
# 2️⃣  DOWNLOAD DATASET (only if not downloaded)
# -------------------------------
#if not os.path.exists(SUBDIR) or len(os.listdir(SUBDIR)) == 0:
#    print("⬇️ Downloading dataset from Hugging Face...")
#    local_dataset_path = snapshot_download(
#        repo_id=HF_DATASET,
#        repo_type="dataset",
#        token=hf_token,
#        local_dir=SUBDIR
#    )
#    print(f"✅ Dataset downloaded to: {local_dataset_path}")
#else:
#    print("ℹ️ Dataset already exists locally. Skipping download.")
#    local_dataset_path = os.path.abspath(SUBDIR)

print("⬇️ Downloading dataset from Hugging Face...")
local_dataset_path = snapshot_download(
    repo_id=HF_DATASET,
    repo_type="dataset",
    token=hf_token,
    local_dir=SUBDIR
)


Edit data.yaml

In [None]:
# -------------------------------
# 3️⃣  CREATE data.yaml (if not exists)
# -------------------------------
data_yaml_path = os.path.join(local_dataset_path, "data.yaml")

yaml_content = f"""path: /content/data
train: images/train
val: images/val
nc: 1
names:
  0: acacias
"""
with open(data_yaml_path, "w") as f:
    f.write(yaml_content)
print("✅ data.yaml created for single class 'acacias'.")

Passar de masks para YOLO format

In [None]:
from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
import os

# Define paths based on your data.yaml and dataset structure
base_data_path = "/content/data"
train_masks_dir = os.path.join(base_data_path, "labels", "train")
val_masks_dir = os.path.join(base_data_path, "labels", "val")

# Define output directories for YOLO segment labels
train_output_dir = os.path.join(base_data_path, "labels", "train") # Overwrite masks with labels
val_output_dir = os.path.join(base_data_path, "labels", "val")   # Overwrite masks with labels

# Number of classes
num_classes = 2

print("Converting training masks to YOLO segmentation format...")
convert_segment_masks_to_yolo_seg(masks_dir=train_masks_dir, output_dir=train_output_dir, classes=num_classes)
print("✅ Training masks converted.")

print("Converting validation masks to YOLO segmentation format...")
convert_segment_masks_to_yolo_seg(masks_dir=val_masks_dir, output_dir=val_output_dir, classes=num_classes)
print("✅ Validation masks converted.")

print("\nConversion complete. You can now try retraining your model.")

Training the model

In [None]:
# ----------------------------------------------------------
# 4️⃣  TRAIN YOLOv8 SEGMENTATION MODEL
# ----------------------------------------------------------
data_yaml_path = os.path.join(SUBDIR, "data.yaml")
print(data_yaml_path)
model = YOLO("yolov8n-seg.pt")  # small model for demo; use yolov8m-seg.pt for stronger one
model.train(data=data_yaml_path, epochs=EPOCHS, imgsz=IMG_SIZE)

Saving to Google Drive

In [None]:
# ----------------------------------------------------------
# 5️⃣  SAVE TO GOOGLE DRIVE (optional)
# ----------------------------------------------------------
# from google.colab import drive
# drive.mount('/content/drive')
# !cp -r runs/segment /content/drive/MyDrive/

Saving the best run to HuggingFace

In [None]:
# --- 3. Export model ---
api = HfApi()
api.upload_file(
    path_or_fileobj="runs/segment/train3/weights/best.pt",
    path_in_repo="best.pt",
    repo_id=hub_model_id,
    token=hf_token
)

Plots

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [None]:
csv_filename = '/content/runs/segment/train3/results.csv'

In [None]:
"""
Loads and preprocesses the training data from a CSV file.
Returns a DataFrame and the number of epochs.
"""
try:
    # Read the CSV data into a pandas DataFrame
    df = pd.read_csv(csv_filename)
    # Convert all columns to numeric, ignoring errors for non-numeric
    for col in df.columns:
        if col.lower() != 'epoch':
             df[col] = pd.to_numeric(df[col], errors='ignore')
    # Check if 'epoch' column exists to set as index
    if 'epoch' in df.columns:
        df = df.set_index('epoch')
    else:
        print("Warning: 'epoch' column not found. Using default numeric index.")

    num_epochs = len(df)

except FileNotFoundError:
    print(f"Error: The file '{csv_filename}' was not found.")
    print("Please make sure the file is in the same directory or provide the full path.")

except Exception as e:
    print(f"An error occurred during data loading: {e}")


In [None]:
"""
Plots the training and validation losses.
"""
print("\nGenerating Plot 1: Training vs. Validation Losses...")
plt.figure(figsize=(12, 7)) # Create a new figure

train_loss_cols = ['train/box_loss', 'train/seg_loss', 'train/cls_loss', 'train/dfl_loss']
val_loss_cols = ['val/box_loss', 'val/seg_loss', 'val/cls_loss', 'val/dfl_loss']

available_train_loss = [col for col in train_loss_cols if col in df.columns]
available_val_loss = [col for col in val_loss_cols if col in df.columns]
# Plot available data
if available_train_loss:
    df[available_train_loss].plot(marker='o', linestyle='--')
if available_val_loss:
    df[available_val_loss].plot(marker='o', linestyle='-')

plt.title(f'Training vs. Validation Losses (Epochs 1-{num_epochs})')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.grid(True)
plt.legend(title='Metrics (Train = dashed, Val = solid)', bbox_to_anchor=(1.05, 1), loc='upper left')

# Ensure x-axis shows integer epochs
plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(integer=True))

# Adjust layout and save
plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout to make space for legends
save_filename1 = 'training_losses.png'
plt.savefig(save_filename1)
plt.show() # Show the first plot
print(f"Successfully generated and saved plot to {save_filename1}")

In [None]:
"""
Plots the mAP performance metrics.
"""
print("\nGenerating Plot 2: Performance Metrics (mAP)...")
plt.figure(figsize=(12, 7)) # Create a new figure

map_cols = ['metrics/mAP50(B)', 'metrics/mAP50-95(B)', 'metrics/mAP50(M)', 'metrics/mAP50-95(M)']
available_map_cols = [col for col in map_cols if col in df.columns]

if available_map_cols:
    df[available_map_cols].plot(marker='o')
    plt.title(f'Performance Metrics (mAP) (Epochs 1-{num_epochs})')
    plt.ylabel('mAP Score')
    plt.xlabel('Epoch')
    plt.grid(True)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    # Ensure x-axis shows integer epochs
    plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
    # Adjust layout and save
    plt.tight_layout(rect=[0, 0, 0.85, 1])
    save_filename2 = 'map_metrics.png'
    plt.savefig(save_filename2)
    plt.show() # Show the second plot
    print(f"Successfully generated and saved plot to {save_filename2}")
else:
    plt.close() # Close the empty figure
    print("No mAP data found to plot.")

In [None]:
"""
Plots the classification loss instability.
"""
print("\nGenerating Plot 3: Classification Loss Instability...")
plt.figure(figsize=(12, 7)) # Create a new figure

cls_loss_cols = ['val/cls_loss', 'train/cls_loss']
available_cls_loss = [col for col in cls_loss_cols if col in df.columns]
if available_cls_loss:
    df[available_cls_loss].plot(marker='o')
    plt.title(f'Classification Loss (val/cls_loss) Instability (Epochs 1-{num_epochs})')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.grid(True)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    # Ensure x-axis shows integer epochs
    plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
    # Adjust layout and save
    plt.tight_layout(rect=[0, 0, 0.85, 1])
    save_filename3 = 'cls_loss.png'
    plt.savefig(save_filename3)
    plt.show() # Show the third plot
    print(f"Successfully generated and saved plot to {save_filename3}")
else:
     plt.close() # Close the empty figure
     print("No Classification Loss data found to plot.")