## Lunar Crater Dataset Preparation

## Introduction
In this notebook, we will prepare the Chandrayaan 2 OHRC Lunar Crater Dataset for use in training a deep learning model. This includes loading the dataset, exploring it, pre-processing images, and preparing data generators.


Create and Load the Dataset

In [None]:
import pandas as pd

# Load CSV files
train_annotations = pd.read_csv('E:/SIH1732/Chandrayaan_2_OHRC_Lunar_Crater_Dataset.v4i.tensorflow/train/annotations.csv')
valid_annotations = pd.read_csv('E:/SIH1732/Chandrayaan_2_OHRC_Lunar_Crater_Dataset.v4i.tensorflow/valid/annotations.csv')

# Display the first few rows of the annotations to understand the structure
print("Training Annotations:")
print(train_annotations.head())
print("\nValidation Annotations:")
print(valid_annotations.head())


Pre-processing  Images

In [None]:
import cv2
import numpy as np

def preprocess_image(image_path, target_size=(640, 640)):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, target_size)
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Test preprocessing on a sample image
sample_image_path = 'E:/SIH1732/Chandrayaan_2_OHRC_Lunar_Crater_Dataset.v4i.tensorflow/train/2_jpg.rf.0ffdaf7fc98b8a0a16a3f00c88bb7faa.jpg'
processed_image = preprocess_image(sample_image_path)
print(processed_image.shape)  # Should print (640, 640)


Splitting the Data

In [None]:
import os

def extract_paths_and_labels(annotations, data_dir):
    image_paths = [os.path.join(data_dir, filename) for filename in annotations['filename']]
    bboxes = annotations[['xmin', 'ymin', 'xmax', 'ymax']].values
    labels = annotations['class'].values
    return image_paths, bboxes, labels

# Define data directories
train_data_dir = 'E:/SIH1732/Chandrayaan_2_OHRC_Lunar_Crater_Dataset.v4i.tensorflow/train'
valid_data_dir = 'E:/SIH1732/Chandrayaan_2_OHRC_Lunar_Crater_Dataset.v4i.tensorflow/valid'

# Extract paths and labels
train_image_paths, train_bboxes, train_labels = extract_paths_and_labels(train_annotations, train_data_dir)
valid_image_paths, valid_bboxes, valid_labels = extract_paths_and_labels(valid_annotations, valid_data_dir)

print(f"Number of training images: {len(train_image_paths)}")
print(f"Number of validation images: {len(valid_image_paths)}")


Creating Data Generators

In [None]:
from tensorflow.keras.utils import Sequence

class LunarCraterDataGenerator(Sequence):
    def __init__(self, image_paths, bboxes, labels, batch_size=32, image_size=(640, 640)):
        self.image_paths = image_paths
        self.bboxes = bboxes
        self.labels = labels
        self.batch_size = batch_size
        self.image_size = image_size

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_image_paths = self.image_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_bboxes = self.bboxes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]

        images = [preprocess_image(path) for path in batch_image_paths]
        return np.array(images), {'bboxes': np.array(batch_bboxes), 'labels': np.array(batch_labels)}

    def on_epoch_end(self):
        # Shuffle data at the end of each epoch (if needed)
        pass

# Create data generators
train_generator = LunarCraterDataGenerator(train_image_paths, train_bboxes, train_labels)
valid_generator = LunarCraterDataGenerator(valid_image_paths, valid_bboxes, valid_labels)

# Test the generator (optional)
for images, labels in train_generator:
    print(images.shape)  # Should print (batch_size, 640, 640)
    print(labels['bboxes'].shape)  # Should print (batch_size, number_of_bboxes, 4)
    print(labels['labels'].shape)  # Should print (batch_size, number_of_bboxes)
    break
