# YOLO Label Conversion and Visualization

This script demonstrates how to convert label files from LabelMe to YOLO format, and then visualize the results using matplotlib. The following libraries are used:

- `labelme2yolo`: A module to convert LabelMe annotations to YOLO format.
- `json` and `os`: Built-in Python libraries for handling JSON data and operating system functionalities, respectively.
- `ultralytics.YOLO`: A library to work with YOLO (You Only Look Once) models.
- `matplotlib.pyplot`: A plotting library used for visualization.
- `PIL.Image`: A library for image processing.

In [None]:
import labelme2yolo
import json
import os
from ultralytics import YOLO
from matplotlib import pyplot as plt
import matplotlib.image as img
from matplotlib.pyplot import figure
from PIL import Image

print(os.getcwd())

# JSON File Processing and Image Path Modification

This script processes JSON files in a specified folder to modify the `imagePath` attribute. The following steps are performed:

1. Iterate over all JSON files in the specified folder.
2. Open and read each JSON file.
3. Check if the `imagePath` attribute exists.
4. Modify the `imagePath` attribute to only include the filename, removing any preceding path.
5. Save the modified data back to the JSON file.
6. Print the contents of a specific JSON file to verify the changes.

In [None]:
folder_path = 'Intersecting_Batches-4'

# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):  # Check if the file is a JSON file
        file_path = os.path.join(folder_path, filename)

        # Open and read the JSON file
        with open(file_path, 'r') as file:
            data = json.load(file)

        # Check if 'ImagePath' exists in the dictionary and modify it
        if 'imagePath' in data:
            data['imagePath'] = data['imagePath'].split('/')[-1]

        # Save the modified data back to the JSON file
        with open(file_path, 'w') as file:
            json.dump(data, file, indent=4)

print("Processing complete. Image paths have been updated.")

file_path = 'Intersecting_Batches-4/tile_0_30.json'

# Open and read the JSON file
with open(file_path, 'r') as file:
    data = json.load(file)

# Print the contents of the JSON file
data['imagePath']


In [None]:
# Open and read the JSON file
with open(file_path, 'r') as file:
    data = json.load(file)
# Print the contents of the JSON file
data

# TIFF to JPEG Conversion Script

This script converts all TIFF files in a specified input directory to JPEG format and saves them to an output directory. The following steps are performed:

1. Check if the output directory exists, and create it if it doesn't.
2. Iterate through all files in the input directory.
3. For each TIFF file, open and convert it to JPEG format.
4. Save the converted JPEG file to the output directory.

In [None]:
from PIL import Image
import os

def convert_tif_to_jpeg(input_dir, output_dir):
    """
    Converts all TIFF files in the input directory to JPEG format,
    saving them to the output directory.
    """
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through all files in the input directory
    for filename in os.listdir(input_dir):
        if filename.endswith(".tif") or filename.endswith(".tiff"):
            # Construct the full file path
            input_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, filename.rsplit('.', 1)[0] + '.jpg')
            
            # Open the image, convert and save as JPEG
            try:
                with Image.open(input_path) as img:
                    img.convert('RGB').save(output_path, "JPEG")
                #print(f"Converted {filename} to JPEG and saved to {output_path}")
            except Exception as e:
                #print(f"Failed to convert {filename}: {e}")

# Example usage
input_directory = 'Intersecting_Batches-4'
output_directory = 'Intersecting_Batches-4/jpeg'
convert_tif_to_jpeg(input_directory, output_directory)


In [None]:
!labelme2yolo --json_dir ./Intersecting_Batches-4 --val_size 0.2

# Checking for balanced classes

# Label Extraction and Visualization Script

This script scans a directory for `.txt` files, extracts the first token from each line (assumed to be a label), counts the occurrences of each label, and plots a bar chart to visualize the label counts. The following steps are performed:

1. Traverse the directory to find all `.txt` files.
2. For each `.txt` file, extract the first token from each line and count its occurrences.
3. Print the count of each label.
4. Plot a bar chart of the label counts using `matplotlib`.

In [None]:
import os
from collections import Counter

def scan_and_extract_labels_from_txt(directory):
    label_counts = Counter()
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r') as f:
                        for line in f:
                            label = line.strip().split()[0]
                            label_counts[label] += 1
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")

    # Print the count of each label
    for label, count in label_counts.items():
        print(f"Label {label}: {count}")

    return label_counts

# Replace 'your_directory_path_here' with the path to your directory
directory_path = 'Post_Event_Grids_In_JPEG/YOLODataset/labels'
label_counts = scan_and_extract_labels_from_txt(directory_path)

# Plotting the bar chart
labels, counts = zip(*label_counts.items())
plt.figure(figsize=(10, 6))
plt.bar(labels, counts, color='skyblue')
plt.xlabel('Labels')
plt.ylabel('Counts')
plt.title('Label Counts in Dataset')
plt.xticks(rotation=45)
plt.show()


# Image Augmentation Pipeline

This script defines an image augmentation pipeline using the `albumentations` library. The pipeline includes various augmentations such as horizontal flip, vertical flip, random rotation, brightness/contrast adjustment, Gaussian noise, and blur. The following steps are performed:

1. Import necessary augmentation functions from `albumentations`.
2. Define a function to create and return a composition of augmentations.

In [None]:
%pip install albumentations


In [None]:
from albumentations import Compose, HorizontalFlip, VerticalFlip, RandomRotate90, RandomBrightnessContrast, GaussNoise, Blur

def get_augmentation_pipeline():
    """Defines and returns an augmentation pipeline with multiple augmentations."""
    return Compose([
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        RandomRotate90(p=0.5),
        RandomBrightnessContrast(p=0.5),
        GaussNoise(p=0.2),
        Blur(blur_limit=3, p=0.2),
        # Add more augmentations here as needed.
    ])


# Image Loading, Saving, and Augmentation Functions

This script provides functions to load an image, save an image, and apply an augmentation pipeline to an image using `opencv` and `albumentations` libraries. The following steps are performed:

1. Load an image from a given path.
2. Save an image to a given path.
3. Apply an augmentation pipeline to an image.

In [None]:
def load_image(image_path):
    """Loads an image from a given path."""
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def save_image(image, path):
    """Saves an image to a given path."""
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imwrite(path, image)


def augment_image(image, augmentation_pipeline):
    """Applies the augmentation pipeline to an image."""
    augmented = augmentation_pipeline(image=image)
    return augmented['image']


# Image Augmentation Pipeline

This script defines an image augmentation pipeline using the `albumentations` library. The pipeline includes various augmentations such as horizontal flip, vertical flip, random rotation, brightness/contrast adjustment, Gaussian noise, and blur. The following steps are performed:

1. Import necessary augmentation functions from `albumentations`.
2. Define a function to create and return a composition of augmentations.


In [None]:
import os
from PIL import Image
import cv2

# Assuming the load_image and save_image functions are already defined as per previous examples.

def process_directory(input_dir, output_dir):
    """Processes all images in the input directory with augmentations and saves them to the output directory."""
    # Make sure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    augmentation_pipeline = get_augmentation_pipeline()

    # Iterate through all files in the input directory
    for filename in os.listdir(input_dir):
        if filename.lower().endswith((".jpg", ".jpeg", ".png", ".tif", ".tiff")):
            input_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, f"aug_{filename}")
            
            try:
                # Load the image
                image = load_image(input_path)
                # Apply augmentations
                augmented_image = augment_image(image, augmentation_pipeline)
                # Save the augmented image
                save_image(augmented_image, output_path)
                print(f"Processed and saved {filename} as {output_path}")
            except Exception as e:
                print(f"Failed to process {filename}: {e}")

# Example usage
if __name__ == "__main__":
    input_directory = 'Intersecting_Batches-4/jpeg'
    output_directory = 'Intersecting_Batches-4/jpeg'
    process_directory(input_directory, output_directory)


# Model

In [None]:
images_path = 'datasets/images'
labels_path = 'datasets/labels'

# YOLOv8 Model Initialization

This line initializes a YOLOv8 model by loading the pre-trained weights from the "yolov8s.pt" file, allowing for object detection and image segmentation tasks.


In [None]:
model = YOLO("yolov8s.pt")

# Model Training

This line trains the YOLOv8 model using the dataset specified in the `dataset.yaml` file. The training process runs for 12 epochs with an image size of 512x512 pixels.


In [None]:
results = model.train(data='./dataset.yaml', epochs=12, imgsz=512)

# Install Packages for Hyperparameter Tuning

This script imports necessary functions for hyperparameter tuning from the `hyperopt` library. The imported functions include:

- `fmin`: Function to minimize the objective function.
- `tpe`: Tree-structured Parzen Estimator, a sequential model-based optimization algorithm.
- `hp`: Hyperparameter search space.
- `STATUS_OK`: Status flag for successful trials.
- `Trials`: Object to keep track of the optimization process and results.


In [None]:
%pip install hyperopt

In [None]:
# install packages for hyperparameter tuning 
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

In [None]:
!python /Users/elenalickel/Desktop/EY_Challengev2/venv/lib/python3.8/site-packages/ultralytics/models/yolo/classify/train.py --img 512 --batch 16 --epochs 30 --data dataset.yaml --weights /runs/detect/train23/weights/last.pt

# Hyperparameter Tuning with Ray Tune

This script performs hyperparameter tuning using the `ray[tune]` library. The process involves defining a search space for hyperparameters and running multiple trials to find the optimal configuration. The following steps are performed:

1. Import necessary modules from `ray.tune`.
2. Define a function `train_model_with_ray_tune` to train the model with parameters provided by Ray Tune.
3. Define the search space for hyperparameters.
4. Start the hyperparameter tuning process with `tune.run`.
5. Extract and print the best hyperparameters.

In [None]:
%pip install ray

In [None]:
%pip install "ray[tune]"

In [None]:
from ray import tune
import numpy as np


def train_model_with_ray_tune(config):
    # Extract parameters from the config provided by Ray Tune
    lr = config["lr"]
    batch_size = config["batch_size"]
    
    # Call your existing training function with parameters from Ray Tune
    train_results = train_model_with_dynamic_config(
        ckpt_path=latest_ckpt_path,  # Make sure latest_ckpt_path is defined
        epochs=30,  # Consider making this a tunable parameter as well
        lr=lr,
        batch_size=batch_size,
    )
    
    # Report the validation metric back to Ray Tune
    # Ensure your training function returns a dictionary with "validation_metric"
    tune.report(validation_metric=train_results['validation_metric'])

# Define the search space for hyperparameters
search_space = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "batch_size": tune.choice([8, 16, 32, 64])
}

# Start the hyperparameter tuning process
analysis = tune.run(
    train_model_with_ray_tune,
    resources_per_trial={"cpu": 1, "gpu": 1},  # Adjust resources per your availability
    config=search_space,
    num_samples=100,  # Number of trials to run
    metric="validation_metric",  # Name of the metric to optimize
    mode="max",  # "max" if higher metric is better, "min" otherwise
)

# Extract the best hyperparameters
best_config = analysis.best_config
best_lr = best_config["lr"]
best_batch_size = best_config["batch_size"]

print(f"Best hyperparameters found were: lr = {best_lr}, batch_size = {best_batch_size}")



In [None]:
# Install the ray module
!pip install ray

import numpy as np
from ray import tune
from ray.tune.schedulers import ASHAScheduler

def train_model_with_dynamic_config(ckpt_path, epochs, lr, batch_size):
    # Placeholder for actual training logic
    # Example return dictionary with a validation metric
    return {"validation_metric": np.random.random()}

def train_model_with_ray_tune(config):
    # Extract parameters from the config provided by Ray Tune
    lr = config["lr"]
    batch_size = config["batch_size"]
    
    # Assuming latest_ckpt_path is a global variable or defined elsewhere in your script
    global latest_ckpt_path
    latest_ckpt_path = "path/to/latest/checkpoint"
    
    # Call your existing training function with parameters from Ray Tune
    train_results = train_model_with_dynamic_config(
        ckpt_path=latest_ckpt_path,
        epochs=30,  # Consider making this a tunable parameter as well
        lr=lr,
        batch_size=batch_size,
    )
    
    # Report the validation metric back to Ray Tune
    tune.report(validation_metric=train_results['validation_metric'])

# Define the search space for hyperparameters
search_space = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "batch_size": tune.choice([8, 16, 32, 64])
}

# Define a scheduler for early stopping
scheduler = ASHAScheduler(
    metric="validation_metric",
    mode="max",
    max_t=30,
    grace_period=1,
    reduction_factor=2
)

# Start the hyperparameter tuning process
analysis = tune.run(
    train_model_with_ray_tune,
    resources_per_trial={"cpu": 1, "gpu": 1},  # Adjust resources per your availability
    config=search_space,
    num_samples=100,  # Number of trials to run
    metric="validation_metric",  # Name of the metric to optimize
    mode="max",  # "max" if higher metric is better, "min" otherwise
    scheduler=scheduler
)

# Extract the best hyperparameters
best_config = analysis.best_config
best_lr = best_config["lr"]
best_batch_size = best_config["batch_size"]

print(f"Best hyperparameters found were: lr = {best_lr}, batch_size = {best_batch_size}")


# Training with Tuned Parameters

After determining the best hyperparameters via tuning, the model is trained using these optimal settings. The following steps are performed:

1. Train the model using the best parameters found during hyperparameter tuning.
2. Track the training results with an experiment tracking system.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as img

# Assuming train_results is a dictionary or similar structure containing your results and metrics

# After determining the best parameters via tuning
train_results = train_model_with_dynamic_config(
    ckpt_path=find_latest_checkpoint(checkpoint_dir),
    epochs=best_params['epochs'],
    lr=best_params['lr'],
    batch_size=best_params['batch_size'],
    # Potentially other parameters tuned or adjusted based on past performance
)

# Track the results with your experiment tracking system
track_experiment_results(train_results)

# Display the training results image
plt.figure(figsize=(15, 10), dpi=80)
# Reading the image
results = img.imread('runs/detect/train_results/results.png')
# Displaying the image
plt.imshow(results)
plt.axis('off')  # Hide the axes for better image display
plt.show()


# List Image Files in a Folder

This script lists all image files in a specified folder. It supports various image file extensions such as `.png`, `.jpg`, `.jpeg`, `.gif`, and `.bmp`. The following steps are performed:

1. Import necessary modules `os` and `glob`.
2. Define a function `list_images` to scan the folder for image files and collect their paths.
3. Specify the folder path to scan for images.
4. Print the list of image paths.


In [None]:
import os
import glob

def list_images(folder_path):
    # List to hold all image paths
    image_paths = []
    
    # Supported image extensions
    image_extensions = ['*.png', '*.jpg', '*.jpeg', '*.gif', '*.bmp']
    
    # Scan through the folder for each image extension
    for extension in image_extensions:
        # Use glob to find all files in folder_path with the current extension
        for image_path in glob.glob(os.path.join(folder_path, extension)):
            # Add the image path to the list
            image_paths.append(image_path)
    
    return image_paths

# Replace 'your/folder/path' with the actual folder path you want to scan
folder_path = 'datasets/images/test'
images = list_images(folder_path)
print(images)


# Load the Model

This line loads a pre-trained YOLOv8 model using the weights from the specified path. The model is ready for inference or further training.


In [None]:
# Load the Model
model = YOLO('./runs/detect/train_results/weights/best.pt')

# Predict and Generate Output for Submission Data

This performs predictions on images in the submission data directory using the YOLO model and generates output text files with bounding box information. The script follows these steps:

1. Define the decoding of predictions according to class names in the `.yaml` file.
2. Set the directory paths for input images and output results.
3. Create the results directory if it doesn't exist.
4. Loop through each image file in the submission data directory and perform predictions.
5. Decode the predictions and save the results to text files in the results directory.


In [None]:
#predict on the submission data for zip
# Decoding according to the .yaml file class names order
decoding_of_predictions ={2: 'undamagedcommercialbuilding', 0: 'undamagedresidentialbuilding', 1: 'damagedresidentialbuilding', 3: 'damagedcommercialbuilding'}

directory = 'Submission data'
# Directory to store outputs
results_directory = 'Validation_Data_Results'

# Create submission directory if it doesn't exist
if not os.path.exists(results_directory):
    os.makedirs(results_directory)

# Loop through each file in the directory
for filename in os.listdir(directory):
    # Check if the current object is a file and ends with .jpeg
    if os.path.isfile(os.path.join(directory, filename)) and filename.lower().endswith('.jpg'):
        # Perform operations on the file
        file_path = os.path.join(directory, filename)
        print(file_path)
        print("Making a prediction on ", filename)
        results = model.predict(file_path, save=True, iou=0.5, save_txt=True, conf=0.25)
        
        for r in results:
            conf_list = r.boxes.conf.numpy().tolist()
            clss_list = r.boxes.cls.numpy().tolist()
            original_list = clss_list
            updated_list = []
            for element in original_list:
                 updated_list.append(decoding_of_predictions[int(element)])

        bounding_boxes = r.boxes.xyxy.numpy()
        confidences = conf_list
        class_names = updated_list

        # Check if bounding boxes, confidences and class names match
        if len(bounding_boxes) != len(confidences) or len(bounding_boxes) != len(class_names):
            print("Error: Number of bounding boxes, confidences, and class names should be the same.")
            continue
        text_file_name = os.path.splitext(filename)[0]
        # Creating a new .txt file for each image in the submission_directory
        with open(os.path.join(results_directory, f"{text_file_name}.txt"), "w") as file:
            for i in range(len(bounding_boxes)):
                # Get coordinates of each bounding box
                left, top, right, bottom = bounding_boxes[i]
                # Write content to file in desired format
                file.write(f"{class_names[i]} {confidences[i]} {left} {top} {right} {bottom}\n")
        print("Output files generated successfully.")
