### Directory Setup, Data Loading, and EDA

In [None]:
import os
import shutil
import json
from collections import defaultdict
from tqdm import tqdm
from PIL import Image

# Setup directories
def setup_directories(base_dir):
    dirs = {
        'train_images': os.path.join(base_dir, "train/images"),
        'train_labels': os.path.join(base_dir, "train/labels"),
        'val_images': os.path.join(base_dir, "val/images"),
        'val_labels': os.path.join(base_dir, "val/labels"),
    }
    for dir_path in dirs.values():
        if os.path.exists(dir_path):
            shutil.rmtree(dir_path)
            print(f"Removed existing directory: {dir_path}")
        os.makedirs(dir_path, exist_ok=True)
        print(f"Created directory: {dir_path}")
    return dirs

#  Load and validate COCO data
def load_and_validate_coco(json_path, images_dir):
    print("Loading COCO annotations...")
    with open(json_path, 'r') as f:
        coco_data = json.load(f)
    
    # Map image IDs to filenames
    image_id_to_file = {img['id']: img['file_name'].split('__')[-1] for img in coco_data['images']}
    
    # Validate images
    valid_images = []
    invalid_images = []
    print("Validating images...")
    for img_id, filename in tqdm(image_id_to_file.items(), desc="Validating Images"):
        img_path = os.path.join(images_dir, filename)
        try:
            if not os.path.exists(img_path):
                invalid_images.append(img_id)
                continue
            with Image.open(img_path) as im:
                im.verify()  # Check if image is corrupt
            valid_images.append(img_id)
        except Exception as e:
            invalid_images.append(img_id)
            print(f"Invalid image {filename}: {str(e)}")
    
    # Filter annotations for valid images
    valid_annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] in valid_images]
    
    # Initial EDA: Class distribution
    class_counts = defaultdict(int)
    for ann in valid_annotations:
        class_counts[ann['category_id']] += 1
    
    print("\nClass distribution in full dataset:")
    category_id_to_name = {cat['id']: cat['name'] for cat in coco_data['categories']}
    for cat_id, count in sorted(class_counts.items()):
        print(f"{category_id_to_name[cat_id]} (ID {cat_id}): {count} annotations")
    
    print(f"\nTotal images: {len(coco_data['images'])}")
    print(f"Valid images: {len(valid_images)}")
    print(f"Invalid images: {len(invalid_images)}")
    print(f"Total annotations: {len(coco_data['annotations'])}")
    print(f"Valid annotations: {len(valid_annotations)}")
    
    return coco_data, image_id_to_file, valid_images, valid_annotations

# Usage
base_dir = "/home/jovyan/__ANIMALS/datasets/Y9_AUTO_FIN/"
images_dir = "/home/jovyan/__ANIMALS/images"
json_path = "result.json" 

dataset_dirs = setup_directories(base_dir)
coco_data, image_id_to_file, valid_images, valid_annotations = load_and_validate_coco(json_path, images_dir)

In [None]:
import os
import shutil
import json
from collections import defaultdict
from tqdm import tqdm
from PIL import Image


#  Load and validate COCO data
def load_and_validate_coco(json_path, images_dir):
    print("Loading COCO annotations...")
    with open(json_path, 'r') as f:
        coco_data = json.load(f)
    
    # Map image IDs to filenames
    image_id_to_file = {img['id']: img['file_name'].split('__')[-1] for img in coco_data['images']}
    
    # Validate images
    valid_images = []
    invalid_images = []
    print("Validating images...")
    for img_id, filename in tqdm(image_id_to_file.items(), desc="Validating Images"):
        img_path = os.path.join(images_dir, filename)
        try:
            if not os.path.exists(img_path):
                invalid_images.append(img_id)
                continue
            with Image.open(img_path) as im:
                im.verify()  # Check if image is corrupt
            valid_images.append(img_id)
        except Exception as e:
            invalid_images.append(img_id)
            print(f"Invalid image {filename}: {str(e)}")
    
    # Filter annotations for valid images
    valid_annotations = [ann for ann in coco_data['annotations'] if ann['image_id'] in valid_images]
    
    # Initial EDA: Class distribution
    class_counts = defaultdict(int)
    for ann in valid_annotations:
        class_counts[ann['category_id']] += 1
    
    print("\nClass distribution in full dataset:")
    category_id_to_name = {cat['id']: cat['name'] for cat in coco_data['categories']}
    for cat_id, count in sorted(class_counts.items()):
        print(f"{category_id_to_name[cat_id]} (ID {cat_id}): {count} annotations")
    
    print(f"\nTotal images: {len(coco_data['images'])}")
    print(f"Valid images: {len(valid_images)}")
    print(f"Invalid images: {len(invalid_images)}")
    print(f"Total annotations: {len(coco_data['annotations'])}")
    print(f"Valid annotations: {len(valid_annotations)}")
    
    return coco_data, image_id_to_file, valid_images, valid_annotations

# Usage
base_dir = "/home/jovyan/__ANIMALS/datasets/Y9_AUTO_FIN/"
images_dir = "/home/jovyan/__ANIMALS/images"
json_path = "result.json" 


coco_data, image_id_to_file, valid_images, valid_annotations = load_and_validate_coco(json_path, images_dir)

In [None]:
import os
import json
from collections import defaultdict
from tqdm import tqdm

def analyze_coco_class_distribution(json_path):
    """Analyze class distribution in COCO format dataset"""
    
    print("Loading COCO annotations...")
    with open(json_path, 'r') as f:
        coco_data = json.load(f)
    
    # Create mappings
    category_id_to_name = {cat['id']: cat['name'] for cat in coco_data['categories']}
    
    # Calculate class statistics
    class_stats = defaultdict(lambda: {'images': set(), 'instances': 0})
    
    print("\nCounting instances...")
    for ann in tqdm(coco_data['annotations'], desc="Processing annotations"):
        class_stats[ann['category_id']]['images'].add(ann['image_id'])
        class_stats[ann['category_id']]['instances'] += 1
    
    total_instances = sum(stats['instances'] for stats in class_stats.values())
    
    # Print class distribution table
    print("\nClass Distribution:")
    print("{:<20} {:<10} {:<10} {:<10}".format("Class", "Images", "Instances", "% of Total"))
    print("-" * 50)
    
    # Sort by instance count (descending)
    sorted_classes = sorted(
        class_stats.items(),
        key=lambda x: x[1]['instances'],
        reverse=True
    )
    
    for cat_id, stats in sorted_classes:
        cat_name = category_id_to_name[cat_id]
        percent = (stats['instances'] / total_instances) * 100 if total_instances > 0 else 0
        print("{:<20} {:<10} {:<10} {:<10.1f}%".format(
            cat_name,
            len(stats['images']),
            stats['instances'],
            percent
        ))
    
    # Print summary
    print("\nDataset Summary:")
    print(f"Total classes: {len(coco_data['categories'])}")
    print(f"Total images: {len(coco_data['images'])}")
    print(f"Total annotations: {len(coco_data['annotations'])}")
    print(f"Total instances: {total_instances}")

# Example usage
if __name__ == "__main__":
    json_path = "result.json" 
    analyze_coco_class_distribution(json_path)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def analyze_class_distribution(coco_data):
    # Create mapping from category ID to name
    id_to_name = {cat['id']: cat['name'] for cat in coco_data['categories']}
    
    # Count annotations per class
    class_counts = {}
    for ann in coco_data['annotations']:
        class_name = id_to_name[ann['category_id']]
        class_counts[class_name] = class_counts.get(class_name, 0) + 1
    
    # Count unique images per class
    images_per_class = {}
    for class_id in id_to_name:
        class_name = id_to_name[class_id]
        # Get all image IDs containing this class
        class_images = set(ann['image_id'] for ann in coco_data['annotations'] 
                          if ann['category_id'] == class_id)
        images_per_class[class_name] = len(class_images)
    
    # Create DataFrame for visualization
    df = pd.DataFrame({
        'Class': list(images_per_class.keys()),
        'Image Count': list(images_per_class.values()),
        'Annotation Count': [class_counts.get(cls, 0) for cls in images_per_class.keys()]
    }).sort_values('Image Count', ascending=False)
    
    # Print summary
    print("="*50)
    print("CLASS DISTRIBUTION ANALYSIS")
    print("="*50)
    print(f"\nTotal Images in Dataset: {len(coco_data['images'])}")
    print(f"Total Annotations: {len(coco_data['annotations'])}")
    print(f"Unique Classes: {len(df)}")
    
    print("\nImages per Class (Top 10):")
    print(df.head(10).to_string(index=False))
    
    print("\nClasses with Fewest Images:")
    print(df.tail(10).to_string(index=False))
    
    # Visualization
    plt.figure(figsize=(12, 8))
    sns.barplot(data=df, y='Class', x='Image Count', palette='viridis')
    plt.title('Number of Images per Class', fontsize=16)
    plt.xlabel('Number of Images', fontsize=12)
    plt.ylabel('Animal Class', fontsize=12)
    plt.tight_layout()
    plt.show()
    
    return df

# Run the analysis
class_distribution = analyze_class_distribution(coco_data)

## Observations from EDA
- Total Images: 1960
- Total Annotations: 2192 (some images have multiple annotations)
- Classes Present: 18 out of 21 (missing "red_squirrel", "wolf", "wolverine")
### Imbalance:
- High-frequency classes: "tiger" (724), "roe_deer_female" (386), "sika_deer_female" (224)
- Low-frequency classes: "otter" (1), "brown_bear" (4), "hare" (7), "ussuri_bear" (7)

## Data Splitting

In [None]:
from collections import defaultdict
import random

def split_coco_dataset(coco_data, valid_images, valid_annotations, train_size=0.8):
    images = [img for img in coco_data['images'] if img['id'] in valid_images]
    annotations = valid_annotations
    categories = coco_data['categories']
    info = coco_data['info']
    
    # Map image IDs to their annotations
    image_to_anns = defaultdict(list)
    for ann in annotations:
        image_to_anns[ann['image_id']].append(ann)
    
    # Group images by category (based on annotations)
    category_to_images = defaultdict(set)
    for img in images:
        img_id = img['id']
        for ann in image_to_anns[img_id]:
            category_to_images[ann['category_id']].add(img_id)
    
    # Split images per category with rare class handling
    train_image_ids = set()
    val_image_ids = set()
    for category_id, img_ids in category_to_images.items():
        img_list = list(img_ids)
        random.shuffle(img_list)
        num_images = len(img_list)
        
        if num_images == 1:
            train_image_ids.update(img_list)
            print(f"Class {category_id} has 1 image, assigned to train.")
        elif num_images < 5:
            split_index = max(1, num_images - 1)  # At least 1 in val
            train_image_ids.update(img_list[:split_index])
            val_image_ids.update(img_list[split_index:])
            print(f"Class {category_id} has {num_images} images: {split_index} train, {num_images - split_index} val.")
        else:
            split_index = int(num_images * train_size)
            train_image_ids.update(img_list[:split_index])
            val_image_ids.update(img_list[split_index:])
    
    # Resolve overlaps: prioritize train for rare classes, move conflicts to val
    overlap = train_image_ids & val_image_ids
    if overlap:
        print(f"Found {len(overlap)} overlapping images. Resolving...")
        for img_id in overlap:
            anns = image_to_anns[img_id]
            min_ann_count = min(len(image_to_anns[img_id]) for img_id in image_to_anns)
            if any(ann['category_id'] in [cat['id'] for cat in categories if len(category_to_images[cat['id']]) < 5] for ann in anns):
                val_image_ids.remove(img_id)  # Keep in train for rare classes
            else:
                train_image_ids.remove(img_id)  # Move to val otherwise
    
    # Finalize train and val sets
    train_images = [img for img in images if img['id'] in train_image_ids]
    val_images = [img for img in images if img['id'] in val_image_ids]
    train_anns = [ann for ann in annotations if ann['image_id'] in train_image_ids]
    val_anns = [ann for ann in annotations if ann['image_id'] in val_image_ids]
    
    train_data = {
        'info': info,
        'images': train_images,
        'annotations': train_anns,
        'categories': categories
    }
    val_data = {
        'info': info,
        'images': val_images,
        'annotations': val_anns,
        'categories': categories
    }
    
    # EDA: Class distribution
    def print_class_distribution(annotations, split_name, category_id_to_name):
        class_counts = defaultdict(int)
        for ann in annotations:
            class_counts[ann['category_id']] += 1
        print(f"\nClass distribution in {split_name} set:")
        for cat_id in sorted(class_counts.keys()):
            print(f"{category_id_to_name[cat_id]} (ID {cat_id}): {class_counts[cat_id]} annotations")
        print(f"Total {split_name} images: {len(train_data['images']) if split_name == 'train' else len(val_data['images'])}")
        print(f"Total {split_name} annotations: {len(annotations)}")
    
    category_id_to_name = {cat['id']: cat['name'] for cat in categories}
    print_class_distribution(train_anns, "train", category_id_to_name)
    print_class_distribution(val_anns, "val", category_id_to_name)
    
    return train_data, val_data

# Usage
train_data, val_data = split_coco_dataset(coco_data, valid_images, valid_annotations, train_size=0.8)

### Copy Images and Generate YOLO Labels

In [None]:
import shutil
from tqdm import tqdm

# Function to copy images
def copy_images(images_dir, dataset_dirs, train_image_ids, val_image_ids, image_id_to_file):
    def copy_subset(image_ids, subset_name):
        for img_id in tqdm(image_ids, desc=f"Copying {subset_name} Images"):
            if img_id not in image_id_to_file:
                print(f"Warning: Image ID {img_id} not found in image_id_to_file.")
                continue
            filename = image_id_to_file[img_id]
            src = os.path.join(images_dir, filename)
            dst = os.path.join(dataset_dirs[f"{subset_name}_images"], filename)
            if os.path.exists(src):
                shutil.copyfile(src, dst)
            else:
                print(f"Warning: {subset_name} image {filename} not found at {src}.")
    
    copy_subset(train_image_ids, "train")
    copy_subset(val_image_ids, "val")

# Function to convert COCO to YOLO format
def prepare_yolo_labels(dataset_dirs, train_data, val_data, image_id_to_file):
    def convert_to_yolo(image_ids, annotations, split_name):
        for img_id in tqdm(image_ids, desc=f"Preparing {split_name} labels"):
            filename = image_id_to_file[img_id]
            img_info = next(img for img in coco_data['images'] if img['id'] == img_id)
            label_path = os.path.join(dataset_dirs[f"{split_name}_labels"], f"{os.path.splitext(filename)[0]}.txt")
            
            with open(label_path, 'w') as f:
                img_anns = [ann for ann in annotations if ann['image_id'] == img_id]
                for ann in img_anns:
                    x, y, w, h = ann['bbox']
                    img_w, img_h = img_info['width'], img_info['height']
                    x_center = (x + w / 2) / img_w
                    y_center = (y + h / 2) / img_h
                    width = w / img_w
                    height = h / img_h
                    f.write(f"{ann['category_id']} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
    
    train_image_ids = [img['id'] for img in train_data['images']]
    val_image_ids = [img['id'] for img in val_data['images']]
    convert_to_yolo(train_image_ids, train_data['annotations'], "train")
    convert_to_yolo(val_image_ids, val_data['annotations'], "val")

# Usage
images_dir = "/home/jovyan/__ANIMALS/images"
train_image_ids = [img['id'] for img in train_data['images']]
val_image_ids = [img['id'] for img in val_data['images']]

copy_images(images_dir, dataset_dirs, train_image_ids, val_image_ids, image_id_to_file)
prepare_yolo_labels(dataset_dirs, train_data, val_data, image_id_to_file)

##  Oversampling

In [None]:
import os
import shutil
from collections import defaultdict
from tqdm import tqdm

def oversample_rare_classes(dataset_dirs, train_data, target_count=10):
    # Map image IDs to annotations
    image_to_anns = defaultdict(list)
    for ann in train_data['annotations']:
        image_to_anns[ann['image_id']].append(ann)
    
    # Count annotations per class in train
    class_counts = defaultdict(int)
    for ann in train_data['annotations']:
        class_counts[ann['category_id']] += 1
    
    # Identify images to oversample
    rare_classes = {cat_id: count for cat_id, count in class_counts.items() if count < target_count}
    print("Rare classes to oversample:", {cat_id: count for cat_id, count in rare_classes.items()})
    
    # Oversample
    train_images_dir = dataset_dirs['train_images']
    train_labels_dir = dataset_dirs['train_labels']
    
    for cat_id, current_count in rare_classes.items():
        needed = target_count - current_count
        if needed <= 0:
            continue
        
        # Get images with this category
        candidate_images = [img for img in train_data['images'] if any(ann['category_id'] == cat_id for ann in image_to_anns[img['id']])]
        print(f"Oversampling class {cat_id}: {current_count} → {target_count}, adding {needed} instances.")
        
        for i in range(needed):
            img = candidate_images[i % len(candidate_images)]  # Cycle through available images
            filename = os.path.basename(img['file_name'].split('__')[-1])
            src_img = os.path.join(train_images_dir, filename)
            src_label = os.path.join(train_labels_dir, f"{os.path.splitext(filename)[0]}.txt")
            dst_img = os.path.join(train_images_dir, f"{os.path.splitext(filename)[0]}_oversample_{i}.jpg")
            dst_label = os.path.join(train_labels_dir, f"{os.path.splitext(filename)[0]}_oversample_{i}.txt")
            
            shutil.copyfile(src_img, dst_img)
            shutil.copyfile(src_label, dst_label)
    
    # Verify new counts
    new_files = [f for f in os.listdir(train_labels_dir) if f.endswith('.txt')]
    new_counts = defaultdict(int)
    for f in tqdm(new_files, desc="Verifying oversampled train set"):
        with open(os.path.join(train_labels_dir, f), 'r') as label_file:
            for line in label_file:
                cat_id = int(line.split()[0])
                new_counts[cat_id] += 1
    
    print("\nNew class distribution in train set after oversampling:")
    category_id_to_name = {cat['id']: cat['name'] for cat in train_data['categories']}
    for cat_id in sorted(new_counts.keys()):
        print(f"{category_id_to_name[cat_id]} (ID {cat_id}): {new_counts[cat_id]} annotations")
    print(f"Total train images after oversampling: {len(os.listdir(train_images_dir))}")

# Usage
oversample_rare_classes(dataset_dirs, train_data, target_count=10)

## Regenerate YOLO Labels with Remapped IDs

In [None]:
import os
from tqdm import tqdm

def prepare_yolo_labels(dataset_dirs, train_data, val_data, image_id_to_file):
    # Define COCO to YOLO ID mapping
    coco_to_yolo = {
        0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8,
        10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 20: 17
    }
    
    def convert_to_yolo(image_ids, annotations, split_name):
        for img_id in tqdm(image_ids, desc=f"Preparing {split_name} labels"):
            filename = image_id_to_file[img_id]
            img_info = next(img for img in coco_data['images'] if img['id'] == img_id)
            label_path = os.path.join(dataset_dirs[f"{split_name}_labels"], f"{os.path.splitext(filename)[0]}.txt")
            
            with open(label_path, 'w') as f:
                img_anns = [ann for ann in annotations if ann['image_id'] == img_id]
                for ann in img_anns:
                    x, y, w, h = ann['bbox']
                    img_w, img_h = img_info['width'], img_info['height']
                    x_center = (x + w / 2) / img_w
                    y_center = (y + h / 2) / img_h
                    width = w / img_w
                    height = h / img_h
                    yolo_id = coco_to_yolo[ann['category_id']]
                    f.write(f"{yolo_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
    
    # Regenerate train labels (including oversampled files)
    train_labels_dir = dataset_dirs['train_labels']
    for label_file in os.listdir(train_labels_dir):
        os.remove(os.path.join(train_labels_dir, label_file))  # Clear existing labels
    
    train_image_ids = [img['id'] for img in train_data['images']]
    convert_to_yolo(train_image_ids, train_data['annotations'], "train")
    
    # Regenerate val labels
    val_labels_dir = dataset_dirs['val_labels']
    for label_file in os.listdir(val_labels_dir):
        os.remove(os.path.join(val_labels_dir, label_file))  # Clear existing labels
    
    val_image_ids = [img['id'] for img in val_data['images']]
    convert_to_yolo(val_image_ids, val_data['annotations'], "val")

# Usage
prepare_yolo_labels(dataset_dirs, train_data, val_data, image_id_to_file)

### YOLO MODEL 

#### YOLO 8S

In [None]:
from ultralytics import YOLO
import os

# Load model
model_name = "yolov8s.pt"
if not os.path.exists(model_name):
    print(f"Downloading {model_name}...")
else:
    print(f"Loading {model_name}...")

model = YOLO(model_name)

# Train
results = model.train(
    data="/home/jovyan/__ANIMALS/datasets/Y9_AUTO_FIN/animals.yaml",
    epochs=100,
    batch=16,
    imgsz=640,
    device="cpu",  # Change to "0" if GPU is available
    patience=10,
    save_period=5,
    pretrained=True,
    optimizer="AdamW",
    lr0=0.001,
    cos_lr=True,
    augment=True,
    mosaic=1.0,      # Combine four images into one
    hsv_h=0.015,     # Hue adjustment
    hsv_s=0.7,       # Saturation adjustment
    hsv_v=0.4,       # Value (brightness) adjustment
    flipud=0.5,      # 50% chance of flipping upside down
    fliplr=0.5,      # 50% chance of flipping left-right (corrected from flip_lr)
    degrees=10.0,    # Random rotation up to ±10 degrees (corrected from rotate)
    translate=0.1,   # Translate by 10% of image size
    scale=0.5,       # Scale by up to 50%
    cls=0.5,         # Weight for classification loss (to help with imbalance)
    box=7.5,         # Weight for bounding box loss
    dfl=1.5          # Weight for distribution focal loss
)

print("Training completed. Results:", results)

In [None]:
  # Run validation
results = model.val(
        data='animals.yaml',
        split='val',
        save_json=True,
        save_conf=True,
        save_txt=True
)

In [None]:
# Load the best model from the training run
model = YOLO("runs/detect/YOLO_5su_fin//weights/best.pt")

# Make predictions on a test image
results = model.predict("test_tiger.jpg", save=True)

# Display the predicted image
%matplotlib inline
import os
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
# Path to the predict folder
predict_dir = "runs/detect/predict4"

# List all files in the predict folder
predicted_files = [f for f in os.listdir(predict_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

# Plot each predicted image
for file in predicted_files:
    pred_img_path = os.path.join(predict_dir, file)
    try:
        img = Image.open(pred_img_path)
        plt.figure(figsize=(18, 12))
        plt.imshow(img)
        plt.title(f"Predicted: {file}")
        plt.axis('off')
        plt.show()
    except Exception as e:
        print(f"Error loading {file}: {e}")

In [None]:
# Path to the train folder
train5_dir = "runs/detect/YOLO_5su_fin//"

# List of metrics to plot
metrics = ["F1_curve.png", "PR_curve.png", "R_curve.png"]

# Plot each metric
for metric in metrics:
    metric_path = os.path.join(train5_dir, metric)
    if os.path.exists(metric_path):
        img = Image.open(metric_path)
        plt.figure(figsize=(8, 6))
        plt.imshow(img)
        plt.title(f"Train5: {metric}")
        plt.axis('off')
        plt.show()
    else:
        print(f"Warning: {metric} not found in {train5_dir}.")


# Path to the val folder
val_dir = "runs/detect/train72"

# List of metrics to plot
metrics = ["F1_curve.png", "PR_curve.png", "R_curve.png"]

# Plot each metric
for metric in metrics:
    metric_path = os.path.join(val_dir, metric)
    if os.path.exists(metric_path):
        img = Image.open(metric_path)
        plt.figure(figsize=(18, 12))
        plt.imshow(img)
        plt.title(f"Validation: {metric}")
        plt.axis('off')
        plt.show()
    else:
        print(f"Warning: {metric} not found in {val_dir}.")

In [None]:
import os
import random

# Directory containing images
images_dir = "/home/jovyan/__ANIMALS/images"

# Select 3 random images
image_files = [f for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
random_images = random.sample(image_files, min(50, len(image_files)))

# Print selected images
print("Selected images for prediction:")
for img in random_images:
    print(img)

In [None]:
from ultralytics import YOLO

# Load the trained YOLO model
model = YOLO("runs/detect/YOLO_5su_fin/weights/best.pt")

In [None]:
# Make predictions and save results
for img in random_images:
    img_path = os.path.join(images_dir, img)
    results = model.predict(img_path, save=True, save_dir="runs/detect/predict")
    print(f"Prediction saved for {img} in runs/detect/predict.")

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image

# Path to the predict folder
predict_dir = "runs/detect/predict6"

# List all files in the predict folder
predicted_files = [f for f in os.listdir(predict_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

# Plot each predicted image
for file in predicted_files:
    pred_img_path = os.path.join(predict_dir, file)
    try:
        img = Image.open(pred_img_path)
        plt.figure(figsize=(18, 12))
        plt.imshow(img)
        plt.title(f"Predicted: {file}")
        plt.axis('off')
        plt.show()
    except Exception as e:
        print(f"Error loading {file}: {e}")

#### YOLO 5m

## Set Up Directories

In [None]:
from ultralytics import YOLO
import os

# Load model
model_name = "yolov8m.pt"
if not os.path.exists(model_name):
    print(f"Downloading {model_name}...")
else:
    print(f"Loading {model_name}...")

model = YOLO(model_name)

# Train
results = model.train(
    data="/home/jovyan/__ANIMALS/datasets/Y9_AUTO_FIN/animals.yaml",
    epochs=100,
    batch=16,
    imgsz=640,
    device="cpu",  # Change to "0" if GPU is available
    patience=10,
    save_period=5,
    pretrained=True,
    optimizer="AdamW",
    lr0=0.001,
    cos_lr=True,
    augment=True,
    mosaic=1.0,      # Combine four images into one
    # hsv_h=0.015,     # Hue adjustment
    # hsv_s=0.7,       # Saturation adjustment
    # hsv_v=0.4,       # Value (brightness) adjustment
    # flipud=0.5,      # 50% chance of flipping upside down
    # fliplr=0.5,      # 50% chance of flipping left-right (corrected from flip_lr)
    # degrees=10.0,    # Random rotation up to ±10 degrees (corrected from rotate)
    # translate=0.1,   # Translate by 10% of image size
    # scale=0.5,       # Scale by up to 50%
    # cls=0.5,         # Weight for classification loss (to help with imbalance)
    # box=7.5,         # Weight for bounding box loss
    # dfl=1.5          # Weight for distribution focal loss
)

print("Training completed. Results:", results)