# First, let us set up a few dependencies

Don't forget to switch to a GPU-enabled colab runtime!

```
Runtime -> Change Runtime Type -> GPU
```

In [None]:
import os
import contextlib
import subprocess
import tempfile # Added here as it's used before SESSION_WORKSPACE cell in .py
import shutil   # Added here as it's used before SESSION_WORKSPACE cell in .py

@contextlib.contextmanager
def directory(name):
  ret = os.getcwd()
  os.chdir(name)
  yield None
  os.chdir(ret)

def run(command, exception_on_failure=False):
  try:
    program_output = subprocess.check_output(f"{command}", shell=True, universal_newlines=True, stderr=subprocess.STDOUT)
  except Exception as e:
    if exception_on_failure:
      raise e
    program_output = e.output
    print(f"Error in run: {program_output}") # Added for visibility of errors
  return program_output

def prun(command, exception_on_failure=False):
  x = run(command, exception_on_failure)
  print(x)
  return x


# This mounts your google drive to this notebook. You might have to change the path to fit with your dataset folder inside your drive.

Read the instruction output by the cell bellow carefully!

In [None]:
# Create a temporary workspace
SESSION_WORKSPACE = tempfile.mkdtemp()
print(f"Session workspace created at: {SESSION_WORKSPACE}")


In [None]:
# Mount the drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DRIVE_PATH = "/content/drive/My Drive"
    print("Google Drive mounted.")
except ImportError:
    print("Google Colab 'drive' not available. Assuming local execution or pre-mounted drive.")
    print("Please ensure your dataset is accessible at the specified DRIVE_PATH or modify script.")
    DRIVE_PATH = os.path.expanduser("~/GoogleDrive/My Drive") # Example for local
    if not os.path.exists(DRIVE_PATH):
         print(f"Warning: Default DRIVE_PATH {DRIVE_PATH} does not exist.")
         DRIVE_PATH = input(f"Please enter the path to your dataset zip file's directory (e.g., /path/to/drive/My Drive): ")


In [None]:
# Unzip the dataset
# import shutil # Moved to Cell 2
# import os # Moved to Cell 2

DATASET_DIR_NAME = "duckietown_object_detection_dataset"
DATASET_ZIP_NAME = f"{DATASET_DIR_NAME}.zip"
DATASET_DIR_PATH = os.path.join(SESSION_WORKSPACE, DATASET_DIR_NAME)
TRAIN_DIR = "train"
VALIDATION_DIR = "val"
IMAGES_DIR = "images"
LABELS_DIR = "labels"

def show_info(base_path: str):
  for l1 in [TRAIN_DIR, VALIDATION_DIR]:
    for l2 in [IMAGES_DIR, LABELS_DIR]:
      p = os.path.join(base_path, l1, l2)
      if os.path.exists(p):
        print(f"#Files in {l1}/{l2}: {len(os.listdir(p))}")
      else:
        print(f"#Path not found: {p}")

def unzip_dataset():
  zip_path = os.path.join(DRIVE_PATH, DATASET_ZIP_NAME)
  if not os.path.exists(zip_path):
      print(f"No zipped dataset found at {zip_path}! Please check the path and filename.")
      alt_zip_path = input(f"Enter the full path to '{DATASET_ZIP_NAME}' if it's elsewhere, or press Enter to abort: ")
      if alt_zip_path and os.path.exists(alt_zip_path):
          zip_path = alt_zip_path
      else:
          print("Dataset zip file not found. Aborting.")
          return False

  print("Unpacking zipped data...")
  shutil.unpack_archive(zip_path, DATASET_DIR_PATH)
  print(f"Zipped dataset unpacked to {DATASET_DIR_PATH}")
  show_info(DATASET_DIR_PATH)
  return True

if not unzip_dataset():
    raise RuntimeError("Dataset unzipping failed. Cannot continue.") # Or use pass / print


In [None]:
# change working directory to the session workspace
os.chdir(SESSION_WORKSPACE)
print(f"PWD: {os.getcwd()}")

# install pytorch and torchvision
!pip3 install torch==1.13.0 torchvision==0.14.0


# Next, we will install YOLO

In [None]:
!pip3 install ultralytics

# We now inform the training process of the format and location of our dataset

In [None]:
%%writefile duckietown.yaml

# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: duckietown_object_detection_dataset/train # Path relative to this YAML file (assumes CWD is SESSION_WORKSPACE)
val: duckietown_object_detection_dataset/val   # Path relative to this YAML file (assumes CWD is SESSION_WORKSPACE)

# number of classes
nc: 4

# class names
names: [ 'duckie', 'cone', 'truck', 'bus' ]


# And we're ready to train! This step will take about 5 minutes.

Notice that we're only training for 10 epochs. That's probably not enough!

In [None]:
from ultralytics import YOLO
import os # Already imported, but good for cell self-containment

BEST_MODEL_PATH = None
try:
    # Load a pretrained YOLO model (e.g., 'yolo11n.pt')
    # Ultralytics will automatically download it if not found locally.
    model = YOLO('yolo11n.pt') # Or 'yolov8n.pt'

    # Train the model
    results = model.train(
        data='duckietown.yaml',        # Path to your dataset config
        epochs=10,                     # Number of epochs
        imgsz=416,                     # Image size
        batch=32,                      # Batch size
        project='runs/train',          # Project directory for saving results
        name='exp',                    # Experiment name
        # device can be 'cpu' or 'cuda' (or 0 for GPU). Auto-detection is default.
    )

    # Get the path to the best model
    BEST_MODEL_PATH = os.path.join(results.save_dir, 'weights/best.pt')
    print(f"Best model saved to: {BEST_MODEL_PATH}")
except Exception as e:
    print(f"An error occurred during training: {e}")
    BEST_MODEL_PATH = None # Ensure it's None if training failed


In [None]:
if BEST_MODEL_PATH and os.path.exists(BEST_MODEL_PATH):
    print(f"The best model path is: {BEST_MODEL_PATH}")
else:
    print("Training did not complete successfully, or the best model path was not found.")


# Next, we can upload your model to Duckietown's cloud!

We will need our token to access our personal cloud space.

In [None]:
# TODO: Fill in the duckietown token here
YOUR_DT_TOKEN = os.getenv("YOUR_DT_TOKEN")
if not YOUR_DT_TOKEN:
    YOUR_DT_TOKEN = input("Please enter your Duckietown Token (or set YOUR_DT_TOKEN env var): ")


Then, we chose the location of the trained model on disk and its name once uploaded to our cloud space. You should not change these values, or the robots will not be able to find the model to download.

In [None]:
import os # Ensure os is imported

# DO NOT CHANGE THESE
model_name = "yolo11n" # Or your chosen model variant name
model_local_path = BEST_MODEL_PATH # This will be None if training failed
model_remote_path = f"courses/mooc/objdet/data/nn_models/{model_name}.pt"

# install DCSS client
!pip3 install dt-data-api


We now open a pointer to our cloud space and upload the model.

In [None]:
import torch # Though not directly used here, often a dependency with model files
from dt_data_api import DataClient, Storage # Import here after pip install in previous cell
import os # Ensure os is imported

if not YOUR_DT_TOKEN or YOUR_DT_TOKEN == "YOUR_TOKEN_HERE" or "YOUR_TOKEN_HERE" in YOUR_DT_TOKEN : # check for placeholder
    print("Duckietown token not provided or is placeholder. Skipping model upload.")
else:
    # open a pointer to our personal duckietown cloud space
    client = DataClient(YOUR_DT_TOKEN)
    storage = client.storage("user")

    # upload model
    if model_local_path and os.path.exists(model_local_path):
        print(f"Uploading {model_local_path} to {model_remote_path}")
        upload = storage.upload(model_local_path, model_remote_path)
        upload.join()
        print("Upload complete.")
    else:
        print(f"Skipping upload: model_local_path is not valid ('{model_local_path}'). Training might have failed or model file not found.")


# Done!

We're done training! You can now close this tab and go back to the `Training` notebook