In [None]:
import kagglehub
import os
import cv2
import numpy as np
import shutil
import random
from pathlib import Path
from ultralytics import YOLO

In [1]:
# Download the animal dataset from kaggle (ONLY RUN THIS ONCE TO LOCALLY HAVE ANIMAL DATASET)
path = kagglehub.dataset_download("iamsouravbanerjee/animal-image-dataset-90-different-animals")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/iamsouravbanerjee/animal-image-dataset-90-different-animals?dataset_version_number=5...


100%|██████████| 656M/656M [00:36<00:00, 18.8MB/s] 

Extracting files...





Path to dataset files: C:\Users\17038\.cache\kagglehub\datasets\iamsouravbanerjee\animal-image-dataset-90-different-animals\versions\5


In [None]:
# Format the animal dataset to match the .yaml .txt training validation approach that YOLO Ultranalystics requires
# (ONLY RUN THIS ONCE DO NOT RUN IF MODEL HAS ALREADY BEEN TRAINED)
# Directories
base_dir = r"C:\\Users\\17038\\a_Fall_2024\\vision\\final_project\\animals"  # Original dataset path
output_dir = r"C:\\Users\\17038\\a_Fall_2024\\vision\\final_project\\yolo_dataset"  # Reorganized dataset path
images_dir = os.path.join(output_dir, "images")
labels_dir = os.path.join(output_dir, "labels")

# Create necessary directories
for split in ["train", "val"]:
    os.makedirs(os.path.join(images_dir, split), exist_ok=True)
    os.makedirs(os.path.join(labels_dir, split), exist_ok=True)

# Split ratio
train_ratio = 0.8

# Class names
class_names = []

# Process each class folder
for idx, class_folder in enumerate(os.listdir(base_dir)):
    class_path = os.path.join(base_dir, class_folder)
    if os.path.isdir(class_path):
        class_names.append(class_folder)  # Collect class names
        
        # List images and annotations
        images = [f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
        random.shuffle(images)
        
        train_count = int(len(images) * train_ratio)
        train_images = images[:train_count]
        val_images = images[train_count:]
        
        # Copy files to train/val splits
        for img_list, split in [(train_images, "train"), (val_images, "val")]:
            for img_name in img_list:
                img_src = os.path.join(class_path, img_name)
                
                # Append class name to the image file name
                new_img_name = f"{class_folder}_{img_name}"
                img_dst = os.path.join(images_dir, split, new_img_name)
                shutil.copy(img_src, img_dst)
                
                # Generate corresponding annotation
                new_label_name = os.path.splitext(new_img_name)[0] + ".txt"
                label_dst = os.path.join(labels_dir, split, new_label_name)
                
                # Create .txt file with placeholder annotation if it doesn't exist
                with open(label_dst, "w") as f:
                    # Placeholder annotation: class index, center (0.5, 0.5), size (1.0, 1.0)
                    # Adjust bounding box annotations as necessary
                    f.write(f"{idx} 0.5 0.5 1.0 1.0\n")

# Generate YAML file
yaml_content = f"""
path: {output_dir}
train: images/train
val: images/val

nc: {len(class_names)}
names: {class_names}
"""
yaml_path = os.path.join(output_dir, "animal_dataset.yaml")
with open(yaml_path, "w") as f:
    f.write(yaml_content)

print(f"Dataset prepared successfully at {output_dir}")
print(f"YAML file saved at {yaml_path}")


Dataset prepared successfully at C:\\Users\\17038\\a_Fall_2024\\vision\\final_project\\yolo_dataset
YAML file saved at C:\\Users\\17038\\a_Fall_2024\\vision\\final_project\\yolo_dataset\animal_dataset.yaml


In [18]:
# Train the model using YOLO from ultralytics
# Load the YOLO model
model = YOLO('yolov5mu.pt')  # Improved YOLOv5 model

model.train(
    data=yaml_path,  # Path to the generated YAML file
    epochs=10,       # Number of training epochs
    batch=16,        # Batch size
    imgsz=640,       # Image size
    workers=4        # Number of workers for data loading
)

print("Training complete. Check 'runs/detect/train' for results.")

New https://pypi.org/project/ultralytics/8.3.49 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.38  Python-3.11.4 torch-2.5.0+cpu CPU (13th Gen Intel Core(TM) i7-13700H)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov5mu.pt, data=C:\\Users\\17038\\a_Fall_2024\\vision\\final_project\\yolo_dataset\animal_dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=4, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic

[34m[1mtrain: [0mScanning C:\Users\17038\a_Fall_2024\vision\final_project\yolo_dataset\labels\train.cache... 4320 images, 869 backgrounds, 0 corrupt: 100%|██████████| 5189/5189 [00:00<?, ?it/s]
[34m[1mval: [0mScanning C:\Users\17038\a_Fall_2024\vision\final_project\yolo_dataset\labels\val.cache... 1080 images, 869 backgrounds, 0 corrupt: 100%|██████████| 1949/1949 [00:00<?, ?it/s]

Plotting labels to runs\detect\train3\labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000106, momentum=0.9) with parameter groups 91 weight(decay=0.0), 98 weight(decay=0.0005), 97 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train3[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G     0.3284      4.788      1.118          4        640: 100%|██████████| 325/325 [1:16:36<00:00, 14.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 61/61 [07:49<00:00,  7.70s/it]

                   all       1949       1080       0.28      0.287       0.16       0.16






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      0.103      2.973     0.8997          4        640: 100%|██████████| 325/325 [1:16:03<00:00, 14.04s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 61/61 [07:09<00:00,  7.04s/it]

                   all       1949       1080      0.276      0.424      0.269      0.268






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G    0.09636      2.276     0.8876          5        640: 100%|██████████| 325/325 [1:16:08<00:00, 14.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 61/61 [07:07<00:00,  7.01s/it]

                   all       1949       1080      0.258      0.566       0.32      0.319






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G    0.09649      1.868     0.8676         15        640:   1%|          | 4/325 [00:59<1:20:14, 15.00s/it]


KeyboardInterrupt: 

In [5]:
from ultralytics import YOLO

# Load the last checkpoint from train3
model = YOLO('runs/detect/train3/weights/last.pt')  # Path to the last checkpoint

# Resume training
model.train(
    data=r'C:\Users\\17038\\a_Fall_2024\\vision\\final_project\\yolo_dataset\\animal_dataset.yaml',  # Path to your dataset YAML file
    epochs=1,                           # Total number of epochs (including previous training)
    batch=16,                            # Adjust based on your GPU
    imgsz=640,                           # Image size
    workers=4,
    project='runs/detect',
    name='train4'
                                                          # Number of data loading workers
)


New https://pypi.org/project/ultralytics/8.3.49 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.38  Python-3.11.4 torch-2.5.0+cpu CPU (13th Gen Intel Core(TM) i7-13700H)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=runs/detect/train3/weights/last.pt, data=C:\Users\\17038\\a_Fall_2024\\vision\\final_project\\yolo_dataset\\animal_dataset.yaml, epochs=1, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=4, project=runs/detect, name=train42, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=

[34m[1mtrain: [0mScanning C:\Users\17038\a_Fall_2024\vision\final_project\yolo_dataset\labels\train.cache... 4320 images, 869 backgrounds, 0 corrupt: 100%|██████████| 5189/5189 [00:00<?, ?it/s]
[34m[1mval: [0mScanning C:\Users\17038\a_Fall_2024\vision\final_project\yolo_dataset\labels\val.cache... 1080 images, 869 backgrounds, 0 corrupt: 100%|██████████| 1949/1949 [00:00<?, ?it/s]

Plotting labels to runs\detect\train42\labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000106, momentum=0.9) with parameter groups 91 weight(decay=0.0), 98 weight(decay=0.0005), 97 bias(decay=0.0)
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\Users\17038\AppData\Local\Programs\Python\Python311\Lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\17038\AppData\Local\Temp\ipykernel_14004\2105492933.py", line 7, in <module>
    model.train(
  File "C:\Users\17038\AppData\Roaming\Python\Python311\site-packages\ultralytics\engine\model.py", line 802, in train
    self.trainer.train()
  File "C:\Users\17038\AppData\Roaming\Python\Python311\site-packages\ultralytics\engine\trainer.py", line 207, in train
    self._do_train(world_size)
  File "C:\Users\17038\AppData\Roaming\Python\Python311\site-packages\ultralytics\engine\trainer.py", line 330, in _do_train
    self.run_callbacks("on_train_start")
  File "C:\Users\17038\AppData\Roaming\Python\Python311\site-packages\ultralytics\engine\trainer.py", line 168, in run_callbacks
    callback(self)
  File "C:\Users\17038\AppData\Roaming\Python\Pytho