<a href="https://colab.research.google.com/github/21cs076/Real-Time-Landslide-Detection-and-Buried-Vehicle-Identification-using-YOLOv8/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Ultralytics package
!pip install ultralytics
!pip install kagglehub

from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO('yolov8n.pt')

# Perform inference on an image from Google Drive
results = model('https://ultralytics.com/images/bus.jpg')

# Iterate through the results and display each
for result in results:
    result.show()

In [None]:
import shutil
import os

# Define the path to the dataset
dataset_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge'

# Check if the directory exists
if os.path.exists(dataset_path):
    # Remove the directory and its contents
    shutil.rmtree(dataset_path)
    print(f"The dataset at {dataset_path} has been deleted.")
else:
    print(f"The dataset at {dataset_path} does not exist.")

import kagglehub

# Replace the dataset with Landslide4Sense Challenge
path = kagglehub.dataset_download("pypiahmad/landslide4sense-challenge")

print("Path to dataset files:", path)

In [None]:
import h5py
import numpy as np
from PIL import Image
import os
import shutil

# Function to convert .h5 images to .jpg
def convert_h5_images_to_jpg(h5_file_path, save_dir):
    with h5py.File(h5_file_path, 'r') as h5f:
        img_data = h5f['img'][:]  # Assuming 'img' is the key for image data

        # Print the shape of the data for debugging
        #print(f"Original shape: {img_data.shape}, dtype: {img_data.dtype}")

        # Select the first three channels for RGB
        img_data = img_data[:, :, :3]

        # Normalize and convert the data to uint8
        img_data = (img_data * 255).astype(np.uint8)

        # Ensure data is in RGB format
        img = Image.fromarray(img_data, mode='RGB')

        # Create the images directory if it doesn't exist
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        img_name = os.path.splitext(os.path.basename(h5_file_path))[0] + '.jpg'
        img.save(os.path.join(save_dir, img_name))

# Function to convert .h5 masks to .txt in YOLO format
def convert_h5_masks_to_txt(h5_file_path, save_dir, img_nm):
    with h5py.File(h5_file_path, 'r') as h5f:
        mask_data = h5f['mask'][:]  # Assuming 'mask' is the key for mask data
        txt_name = img_nm + '.txt'
        #label_dir = os.path.join(save_dir, 'labels')

        # Create the labels directory if it doesn't exist
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        with open(os.path.join(save_dir, txt_name), 'w') as f:
            for mask in mask_data:
                # Example: Convert mask to YOLO format and write to txt
                # Assuming mask is [class, x_center, y_center, width, height]
                yolo_format = ' '.join(map(str, mask))
                f.write(yolo_format + '\n')

# Define input paths (updated dataset locations)
train_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/TrainData/img'
train_mask_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/TrainData/mask'
test_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/TestData/img'
valid_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/ValidData/img'

# Define output paths for each dataset (images and labels separately)
train_output_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/train'
train_output_path1 = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/train'
#test_output_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/test'
#valid_output_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/val'

# Process TrainData img and mask
img_files = [f for f in os.listdir(train_img_path) if f.startswith('image') and f.endswith('.h5')]
mask_files = [f for f in os.listdir(train_mask_path) if f.startswith('mask') and f.endswith('.h5')]

# Ensure both lists are sorted for consistent pairing
img_files.sort()
mask_files.sort()

for img_file, mask_file in zip(img_files, mask_files):
    # Convert images from .h5 to .jpg
    convert_h5_images_to_jpg(os.path.join(train_img_path, img_file), train_output_path)

    # Convert masks from .h5 to .txt
    img_nm = os.path.splitext(os.path.basename(os.path.join(train_img_path, img_file)))[0]
    convert_h5_masks_to_txt(os.path.join(train_mask_path, mask_file), train_output_path1, img_nm)

'''
# Process TestData img
for file_name in os.listdir(test_img_path):
    if file_name.startswith('image') and file_name.endswith('.h5'):
        convert_h5_images_to_jpg(os.path.join(test_img_path, file_name), test_output_path)

# Process ValidData img
for file_name in os.listdir(valid_img_path):
    if file_name.startswith('image') and file_name.endswith('.h5'):
        convert_h5_images_to_jpg(os.path.join(valid_img_path, file_name), valid_output_path)

'''

In [30]:
train_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/TrainData'
test_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/TestData'
valid_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/ValidData'

shutil.rmtree(train_img_path)
shutil.rmtree(test_img_path)
shutil.rmtree(valid_img_path)


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define the paths
train_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/train'
train_label_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/train'
val_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/val'
val_label_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/val'
test_img_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/test'
test_label_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/test'

# Create directories if they don't exist
os.makedirs(val_img_path, exist_ok=True)
os.makedirs(val_label_path, exist_ok=True)
os.makedirs(test_img_path, exist_ok=True)
os.makedirs(test_label_path, exist_ok=True)

# Get list of all images and corresponding labels
images = [f for f in os.listdir(train_img_path) if f.endswith('.jpg')]
labels = [f for f in os.listdir(train_label_path) if f.endswith('.txt')]

# Ensure both lists are sorted for consistent pairing
images.sort()
labels.sort()

# Split the dataset into train, val, and test
train_imgs, temp_imgs, train_labels, temp_labels = train_test_split(images, labels, test_size=0.4, random_state=42)
val_imgs, test_imgs, val_labels, test_labels = train_test_split(temp_imgs, temp_labels, test_size=0.5, random_state=42)

def move_files(file_list, src_dir, dest_dir):
    for file in file_list:
        shutil.move(os.path.join(src_dir, file), os.path.join(dest_dir, file))

# Move validation and test images and labels to their respective directories
move_files(val_imgs, train_img_path, val_img_path)
move_files(val_labels, train_label_path, val_label_path)
move_files(test_imgs, train_img_path, test_img_path)
move_files(test_labels, train_label_path, test_label_path)

print("Dataset split and moved successfully.")

In [None]:
print("Dataset: ", os.listdir(path))

print("Train Image: ", os.listdir('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/train'))
print("Valid Image: ", os.listdir('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/val'))
print("Test Image: ", os.listdir('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/test'))

print("Train Label: ", os.listdir('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/train'))
print("Valid Label: ", os.listdir('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/val'))
print("Test Label: ", os.listdir('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/test'))

In [None]:
import shutil
from google.colab import files

# Define the source and target paths
dataset_path = "/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1"
zip_file_path = "/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/landslide4sense_dataset.zip"  # Adjust as needed for your environment

# Compress the dataset folder into a zip file
shutil.make_archive(base_name=zip_file_path.replace('.zip', ''), format='zip', root_dir=dataset_path)
print(f"Dataset compressed and saved to: {zip_file_path}")

# Download the ZIP file
files.download(zip_file_path)


In [None]:
import os
from PIL import Image

# Define the paths to the image directories
image_dirs = [
    '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/train',
    '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/val',
    '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/test'
]

corrupt_images = []

for image_dir in image_dirs:
    for filename in os.listdir(image_dir):
        if filename.endswith('.jpg'):
            try:
                img_path = os.path.join(image_dir, filename)
                img = Image.open(img_path)
                img.verify()  # Verify that the image is not corrupted
            except (IOError, SyntaxError) as e:
                print(f'Corrupt image found: {img_path}')
                corrupt_images.append(img_path)

print(f'Total corrupt images: {len(corrupt_images)}')

# Define the directories
train_image_dir = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/train'
train_label_dir = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/train'
val_image_dir = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/val'
val_label_dir = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/val'
test_image_dir = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/test'
test_label_dir = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/test'

# Check for missing labels in train dataset
train_images = [f for f in os.listdir(train_image_dir) if f.endswith('.jpg')]
for image in train_images:
    label_file = os.path.splitext(image)[0] + '.txt'
    if not os.path.exists(os.path.join(train_label_dir, label_file)):
        print(f"Missing label for train image: {image}")

# Check for missing labels in validation dataset
val_images = [f for f in os.listdir(val_image_dir) if f.endswith('.jpg')]
for image in val_images:
    label_file = os.path.splitext(image)[0] + '.txt'
    if not os.path.exists(os.path.join(val_label_dir, label_file)):
        print(f"Missing label for validation image: {image}")

# Check for missing labels in test dataset
test_images = [f for f in os.listdir(test_image_dir) if f.endswith('.jpg')]
for image in test_images:
    label_file = os.path.splitext(image)[0] + '.txt'
    if not os.path.exists(os.path.join(test_label_dir, label_file)):
        print(f"Missing label for test image: {image}")

# Print all valid images and labels in validation and test datasets
print("Valid Image: ", os.listdir(val_image_dir))
print("Valid Label: ", os.listdir(val_label_dir))
print("Test Image: ", os.listdir(test_image_dir))
print("Test Label: ", os.listdir(test_label_dir))

yaml_content = """
train: /root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/train
val: /root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/val
test: /root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/images/test

nc: 2  # Number of classes (e.g., landslide, non-landslide)
names: ['landslide', 'non-landslide']
"""

# Save the content to a .yaml file
with open("/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/dataset.yaml", "w") as file:
    file.write(yaml_content)

# Verify the file was created successfully
with open("/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/dataset.yaml", "r") as file:
    print(file.read())


In [35]:
from ultralytics import YOLO

# Initialize model
model = YOLO('yolov8n.pt')  # Use pre-trained weights

# Define the training configuration
epochs = 100  # You can adjust this as needed
data_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/dataset.yaml'
imgsz = 640

# Train the model
#model.train(data=data_path, epochs=epochs, imgsz=imgsz)
model.train(data=data_path, epochs=epochs, imgsz=imgsz, val=False)

# Save the trained model in PyTorch format
model.save('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/best_model.pt')

# Export the model to ONNX format
model.export(format='onnx', path='/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/best_model.onnx')

Ultralytics 8.3.55 🚀 Python-3.10.12 torch-2.5.1+cu121 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/dataset.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=False, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, 

[34m[1mtrain: [0mScanning /root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/train... 2279 images, 0 backgrounds, 2279 corrupt: 100%|██████████| 2279/2279 [00:03<00:00, 614.64it/s]






[34m[1mtrain: [0mNew cache created: /root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/labels/train.cache


ValueError: not enough values to unpack (expected 3, got 0)

In [None]:
import onnxruntime as ort
import numpy as np

# Load the exported ONNX model using onnxruntime for inference
onnx_model_path = '/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/best_model.onnx'
onnx_session = ort.InferenceSession(onnx_model_path)

# Inference example using ONNX model (dummy input)
# Assuming input is a 640x640 image
dummy_input = np.random.rand(1, 3, 640, 640).astype(np.float32)

# Run inference
outputs = onnx_session.run(None, {'images': dummy_input})

# Print the outputs (you can process it based on your needs)
print(outputs)


In [None]:

# Perform inference
results = model('/root/.cache/kagglehub/datasets/pypiahmad/landslide4sense-challenge/versions/1/TestData/image_1.jpg')

# Display results
results.show()


In [None]:
# Clone YOLOv8 repository
!git clone https://github.com/ultralytics/ultralytics.git
%cd ultralytics
