In [None]:
import os
import torch
from ultralytics import YOLO

unzipped_folder = r"V:\temp\_yolo_train\mouss_source_data"  # Use raw string for Windows paths
print(torch.cuda.is_available())  # Should return True if CUDA is properly set up
print(torch.version.cuda)  # Prints the version of CUDA PyTorch is using
print(f"Available GPUs: {torch.cuda.device_count()}")
print(f"Current GPU: {torch.cuda.current_device()}")
print(f"GPU Name: {torch.cuda.get_device_name(torch.cuda.current_device())}")

# Step 3: Check if CUDA is available and load the large model (YOLOv8x) to CUDA if possible
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Step 4: Create folders for the new dataset (images and labels)
base_path = r"V:\temp\20240923_yolo_train\lgrds_fish_dataset"

# Step 11: Train the YOLOv8n model using the generated dataset
small_model = YOLO("yolov8n.pt")  # Load the smaller YOLOv8n model

# Train the model using the generated fish-only dataset
small_model.train(data=yaml_file_path, epochs=50, imgsz=416, batch=16, lr0=0.001)

print("Training complete!")
# Save the model
small_model.save(r"V:\temp\20240923_yolo_train\yolov8n_fish_trained_lgrds.pt")

# Evaluate model performance
metrics = small_model.val(data=yaml_file_path)  # Evaluate precision, recall, and mAP

In [None]:
import os
import torch
from ultralytics import YOLO

unzipped_folder = r"V:\temp\20161014_192048_1"  # Use raw string for Windows paths

# Step 3: Check if CUDA is available and load the large model (YOLOv8x) to CUDA if possible
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Load the large model (YOLOv8x) that was fine-tuned for VME detection
large_model = YOLO(r"V:\temp\models\best.pt")  # Update to your model path
large_model = large_model.to(device)  # Move the model to GPU (CUDA) if available

# Step 4: Create folders for the new dataset (images and labels)
base_path = r"V:\temp\lg_fish_dataset"


# Step 10: Create a YAML file for the dataset
fish_dataset_yaml = f"""
train: "V:/temp/lg_fish_dataset/images/train"
val: "V:/temp/lg_fish_dataset/images/val"

# Number of classes
nc: 1

# Class names
names: ['Fish']
"""

# Save the YAML file
yaml_file_path = os.path.join(base_path, "fish_dataset.yaml")
with open(yaml_file_path, "w") as yaml_file:
    yaml_file.write(fish_dataset_yaml)

print(f"YAML file created: {yaml_file_path}")


# Step 11: Train the YOLOv8n model using the generated dataset
small_model = YOLO("yolov8n.pt")  # Load the smaller YOLOv8n model

# Train the model using the generated fish-only dataset
small_model.train(data=yaml_file_path, epochs=50, imgsz=416, batch=16, lr0=0.001)

print("Training complete!")
# Save the model
small_model.save(r"V:\temp\yolov8n_fish_trained_lgds.pt")

# Evaluate model performance
metrics = small_model.val(data=yaml_file_path)  # Evaluate precision, recall, and mAP

# Yolo8n Training -
## Unsupervised training of Yolo8n (nano) model with Yolo8x large model and dataset

In [None]:
import os
import py7zr
import torch
from ultralytics import YOLO

#!pip install ultralytics
#!pip install py7zr  # Make sure py7zr is installed for extracting .7z files

# Step 2: Unzip the original images from the .7z archive
# Define the path to the .7z file containing the images
seven_zip_file_path = "/content/original.7z"  # Replace with your .7z file path
unzipped_folder = "/content"

# Extract the .7z file into the specified directory
with py7zr.SevenZipFile(seven_zip_file_path, mode='r') as archive:
    archive.extractall(unzipped_folder)

unzipped_folder = "/content/original/original"

# Step 3: Check if CUDA is available and load the large model (YOLOv8x) to CUDA if possible
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Load the large model (YOLOv8x) that was fine-tuned for VME detection
large_model = YOLO("/content/best.pt")  # Replace with your model path
large_model = large_model.to(device)  # Move the model to GPU (CUDA) if available

# Step 4: Create folders for the new dataset (images and labels)
base_path = "/content/fish_dataset"

# Create directories for train/val images and labels
os.makedirs(f"{base_path}/images/train", exist_ok=True)
os.makedirs(f"{base_path}/images/val", exist_ok=True)
os.makedirs(f"{base_path}/labels/train", exist_ok=True)
os.makedirs(f"{base_path}/labels/val", exist_ok=True)

# Step 5: Define the input folder containing the original images (unzipped)
input_images_folder = unzipped_folder
image_paths = [os.path.join(input_images_folder, img) for img in os.listdir(input_images_folder) if img.endswith(('.jpg', '.png'))]

# Split images into train and validation (80% train, 20% validation)
train_images = image_paths[:int(0.8 * len(image_paths))]
val_images = image_paths[int(0.8 * len(image_paths)):]

# Function to save YOLO format labels (class_id x_center y_center width height)
def save_yolo_labels(label_path, class_id, bbox, image_width, image_height):
    if bbox is not None and len(bbox) > 0:  # Ensure there are bounding boxes
        x_center = (bbox[0] + bbox[2]) / 2 / image_width
        y_center = (bbox[1] + bbox[3]) / 2 / image_height
        width = (bbox[2] - bbox[0]) / image_width
        height = (bbox[3] - bbox[1]) / image_height
        with open(label_path, "a") as f:
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
        print(f"Label saved for {label_path}: {class_id} {x_center} {y_center} {width} {height}")
    else:
        print(f"No bounding boxes found for {label_path}, skipping label.")

# Function to process images, run inference, and save results
def process_images(image_paths, split):
    for image_path in image_paths:
        # Step 6: Run the large model to detect objects in the image (with GPU if available)
        results = large_model(image_path)

        # Check if any instances were detected
        print(f"Processing {image_path}, found {len(results[0].boxes)} instances.")

        # Extract image dimensions
        img_name = os.path.basename(image_path)
        img = results[0].orig_img
        img_height, img_width = img.shape[:2]

        # Save the image to the appropriate split folder (train/val)
        output_image_path = f"{base_path}/images/{split}/{img_name}"
        os.rename(image_path, output_image_path)

        # Step 7: Filter results to only include "fish" category (class_id = 2)
        fish_class_index = 2  # Assuming fish is class 2 in the large model
        for result in results:
            for i, cls in enumerate(result.boxes.cls):
                if cls == fish_class_index:  # Only keep fish detections
                    bbox = result.boxes.xyxy[i].cpu().numpy()  # Bounding box (x1, y1, x2, y2)
                    print(f"Detected fish with bounding box: {bbox}")
                    print(f"Image dimensions: {img_width}x{img_height}")

                    # Step 8: Save label file in YOLO format
                    label_name = img_name.replace(".jpg", ".txt").replace(".png", ".txt")
                    label_path = f"{base_path}/labels/{split}/{label_name}"

                    # Save the bounding box in YOLO format (class_id = 0 for fish)
                    save_yolo_labels(label_path, class_id=0, bbox=bbox, image_width=img_width, image_height=img_height)

# Step 9: Process all training and validation images
print("Processing all training images...")
process_images(train_images, "train")

print("Processing all validation images...")
process_images(val_images, "val")

# Step 10: Create a YAML file for the dataset
fish_dataset_yaml = """
train: /content/fish_dataset/images/train
val: /content/fish_dataset/images/val

# Number of classes
nc: 1

# Class names
names: ['fish']
"""

# Save the YAML file
with open("/content/fish_dataset.yaml", "w") as yaml_file:
    yaml_file.write(fish_dataset_yaml)

print("YAML file created: /content/fish_dataset.yaml")

# Step 11: Train the YOLOv8n model using the generated dataset
small_model = YOLO("yolov8n.pt")  # Load the smaller YOLOv8n model

# Train the model using the generated fish-only dataset
small_model.train(data="/content/fish_dataset.yaml", epochs=50, imgsz=416, batch=16, lr0=0.001)

print("Training complete!")
# model save
small_model.save("/content/yolov8n_fish_trained.pt")
# model val stats
metrics = small_model.val(data="/content/fish_dataset.yaml")  # This will evaluate precision, recall, and mAP

### Start Model Metrics

In [None]:
metrics = small_model.val(data="/content/fish_dataset.yaml")  # This will evaluate precision, recall, and mAP

### Download Files

In [None]:
import shutil
from google.colab import files

# 1. Save and Download the Trained YOLOv8n Model
small_model_path = '/content/yolov8n_fish_trained.pt'
small_model.save(small_model_path)  # Save the trained model
files.download(small_model_path)  # Download the trained model

# 2. Zip and Download the Training Logs (runs folder)
# YOLOv8 stores training results and logs in the 'runs' folder, which may contain weights, metrics, and other logs
runs_folder = '/content/runs'
runs_zip = '/content/runs.zip'

# Zip the 'runs' folder if it exists
if os.path.exists(runs_folder):
    shutil.make_archive(runs_zip.replace('.zip', ''), 'zip', runs_folder)
    files.download(runs_zip)  # Download the zipped 'runs' folder

# 3. Zip and Download the fish_dataset Folder
fish_dataset_folder = '/content/fish_dataset'
fish_dataset_zip = '/content/fish_dataset.zip'

# Zip the 'fish_dataset' folder
shutil.make_archive(fish_dataset_zip.replace('.zip', ''), 'zip', fish_dataset_folder)
files.download(fish_dataset_zip)  # Download the zipped 'fish_dataset' folder

# 4. Download the Custom fish_dataset.yaml File
yaml_file_path = '/content/fish_dataset.yaml'
files.download(yaml_file_path)  # Download the dataset YAML file

print("All relevant files and folders have been zipped and are ready for download!")


## Test Script

In [9]:
import os
import py7zr
import torch
from ultralytics import YOLO

# Step 1: Install Ultralytics YOLOv8 and py7zr (if not already installed)
#!pip install ultralytics py7zr

# Define the folder path
unzipped_folder = "/content/original/original"

# Step 3: Check if CUDA is available and load the large model (YOLOv8x) to CUDA if possible
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Load the large model (YOLOv8x) that was fine-tuned for VME detection
large_model = YOLO("/content/best.pt")  # Replace with your model path
large_model = large_model.to(device)  # Move the model to GPU (CUDA) if available

# Step 4: Create folders for the new dataset (images and labels)
base_path = "/content/fish_dataset"

# Create directories for train/val images and labels
os.makedirs(f"{base_path}/images/train", exist_ok=True)
os.makedirs(f"{base_path}/images/val", exist_ok=True)
os.makedirs(f"{base_path}/labels/train", exist_ok=True)
os.makedirs(f"{base_path}/labels/val", exist_ok=True)

# Step 5: Define the input folder containing the original images (unzipped)
input_images_folder = unzipped_folder
image_paths = [os.path.join(input_images_folder, img) for img in os.listdir(input_images_folder) if img.endswith(('.jpg', '.png'))]

# Split images into train and validation (80% train, 20% validation)
train_images = image_paths[:int(0.8 * len(image_paths))]
val_images = image_paths[int(0.8 * len(image_paths)):]

# Function to save YOLO format labels (class_id x_center y_center width height)
def save_yolo_labels(label_path, class_id, bbox, image_width, image_height):
    if bbox is not None and len(bbox) > 0:  # Ensure there are bounding boxes
        x_center = (bbox[0] + bbox[2]) / 2 / image_width
        y_center = (bbox[1] + bbox[3]) / 2 / image_height
        width = (bbox[2] - bbox[0]) / image_width
        height = (bbox[3] - bbox[1]) / image_height
        with open(label_path, "a") as f:
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
        print(f"Label saved for {label_path}: {class_id} {x_center} {y_center} {width} {height}")
    else:
        print(f"No bounding boxes found for {label_path}, skipping label.")

# Function to process images, run inference, and save results
def process_images(image_paths, split):
    for image_path in image_paths:
        # Step 6: Run the large model to detect objects in the image (with GPU if available)
        results = large_model(image_path)

        # Check if any instances were detected
        print(f"Processing {image_path}, found {len(results[0].boxes)} instances.")

        # Extract image dimensions
        img_name = os.path.basename(image_path)
        img = results[0].orig_img
        img_height, img_width = img.shape[:2]

        # Save the image to the appropriate split folder (train/val)
        output_image_path = f"{base_path}/images/{split}/{img_name}"
        os.rename(image_path, output_image_path)

        # Step 7: Filter results to only include "fish" category (class 2 based on your provided example)
        fish_class_index = 2  # Update to class 2 for fish
        for result in results:
            for i, cls in enumerate(result.boxes.cls):
                if cls == fish_class_index:  # Only keep fish detections
                    bbox = result.boxes.xyxy[i].cpu().numpy()  # Bounding box (x1, y1, x2, y2)
                    print(f"Detected fish with bounding box: {bbox}")
                    print(f"Image dimensions: {img_width}x{img_height}")

                    # Step 8: Save label file in YOLO format
                    label_name = img_name.replace(".jpg", ".txt").replace(".png", ".txt")
                    label_path = f"{base_path}/labels/{split}/{label_name}"

                    # Save the bounding box in YOLO format (class_id = 0 for fish)
                    save_yolo_labels(label_path, class_id=0, bbox=bbox, image_width=img_width, image_height=img_height)

# Step 9: Process train and validation images (use a small sample for testing)
print("Processing a small sample of training images to test label generation...")
sample_images = train_images[:5]  # Test with a few images first
process_images(sample_images, "train")

# Check if labels were generated
train_labels = os.listdir(f"{base_path}/labels/train")
print(f"Generated labels for training set: {train_labels}")

print("Test complete!")


Using device: cuda
Processing a small sample of training images to test label generation...

image 1/1 /content/original/original/01649.jpg: 512x640 1 Fish, 69.2ms
Speed: 3.2ms preprocess, 69.2ms inference, 1.5ms postprocess per image at shape (1, 3, 512, 640)
Processing /content/original/original/01649.jpg, found 1 instances.
Detected fish with bounding box: [     175.25      150.98      378.53      304.93]
Image dimensions: 968x728
Label saved for /content/fish_dataset/labels/train/01649.txt: 0 0.286039967182254 0.31312393356155566 0.21000108640056012 0.21146348806527945

image 1/1 /content/original/original/00381.jpg: 512x640 2 Fishs, 43.9ms
Speed: 2.2ms preprocess, 43.9ms inference, 1.4ms postprocess per image at shape (1, 3, 512, 640)
Processing /content/original/original/00381.jpg, found 2 instances.
Detected fish with bounding box: [      103.2      167.75      311.31      326.53]
Image dimensions: 968x728
Label saved for /content/fish_dataset/labels/train/00381.txt: 0 0.2141046