In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("aqibrehmanpirzada/nwpuresisc45")

print("Path to dataset files:", path)

Path to dataset files: /Users/Hassaan/.cache/kagglehub/datasets/aqibrehmanpirzada/nwpuresisc45/versions/1


### Creating train set for SwinIR model

In [4]:
import cv2
from pathlib import Path

# Configuration
config = {
    'scale_factor': 4,  # Downsampling factor (4x means HR will be 4 times larger than LR)
    'images_per_class': 50,  # None = all images, or specify a number like 100
    'hr_base_path': 'HR/Dataset/train/train',
    'hr_output_path': 'HR_flat/train',
    'lr_output_path': 'LR_flat/x4/train',
    'image_extensions': ['.jpg', '.jpeg', '.png', '.bmp']
}

def generate_flat_dataset(config):
    """Generate flat HR and LR directories without class subdirectories"""

    hr_input_path = Path(config['hr_base_path'])
    hr_output_path = Path(config['hr_output_path'])
    lr_output_path = Path(config['lr_output_path'])

    # Create output directories if they don't exist
    hr_output_path.mkdir(parents=True, exist_ok=True)
    lr_output_path.mkdir(parents=True, exist_ok=True)

    # Get all class directories
    class_dirs = sorted([d for d in hr_input_path.iterdir() if d.is_dir()])

    print(f"Found {len(class_dirs)} classes")

    total_images = 0

    for class_dir in class_dirs:
        class_name = class_dir.name
        print(f"\nProcessing class: {class_name}")

        # Get all image files in this class
        image_files = []
        for ext in config['image_extensions']:
            image_files.extend(list(class_dir.glob(f"*{ext}")))

        # Limit images per class if specified
        if config['images_per_class'] is not None:
            image_files = image_files[:config['images_per_class']]

        print(f"Processing {len(image_files)} images...")

        for img_file in image_files:
            # Read HR image
            hr_img = cv2.imread(str(img_file))

            if hr_img is None:
                print(f"Warning: Could not read {img_file}")
                continue

            # Create unique filename with class prefix
            new_filename = f"{class_name}_{img_file.name}"

            # Save HR image to flat directory
            hr_output_file = hr_output_path / new_filename
            cv2.imwrite(str(hr_output_file), hr_img)

            # Calculate LR dimensions
            h, w = hr_img.shape[:2]
            lr_h = h // config['scale_factor']
            lr_w = w // config['scale_factor']

            # Generate LR image using bicubic interpolation
            lr_img = cv2.resize(hr_img, (lr_w, lr_h), interpolation=cv2.INTER_CUBIC)

            # Save LR image to flat directory
            lr_output_file = lr_output_path / new_filename
            cv2.imwrite(str(lr_output_file), lr_img)

            total_images += 1

        print(f"Completed {class_name}: {len(image_files)} images processed")

    print(f"\nDataset generation complete!")
    print(f"Total images: {total_images}")
    print(f"HR directory: {hr_output_path}")
    print(f"LR directory: {lr_output_path}")

# Execute the generation
generate_flat_dataset(config)

Found 45 classes

Processing class: airplane
Processing 50 images...
Completed airplane: 50 images processed

Processing class: airport
Processing 50 images...
Completed airport: 50 images processed

Processing class: baseball_diamond
Processing 50 images...
Completed baseball_diamond: 50 images processed

Processing class: basketball_court
Processing 50 images...
Completed basketball_court: 50 images processed

Processing class: beach
Processing 50 images...
Completed beach: 50 images processed

Processing class: bridge
Processing 50 images...
Completed bridge: 50 images processed

Processing class: chaparral
Processing 50 images...
Completed chaparral: 50 images processed

Processing class: church
Processing 50 images...
Completed church: 50 images processed

Processing class: circular_farmland
Processing 50 images...
Completed circular_farmland: 50 images processed

Processing class: cloud
Processing 50 images...
Completed cloud: 50 images processed

Processing class: commercial_are