In [None]:
!pip install rasterio



In [None]:
import numpy as np
from pathlib import Path
from scipy import ndimage
from scipy.stats import mode
import glob
import rasterio
from rasterio.enums import Resampling

In [None]:

def majority_filter(image, kernel_size=3):
    """
    Apply majority filter - replaces each pixel with the most common value
    in its neighborhood. Excellent for classification maps.
    """
    pad = kernel_size // 2
    padded = np.pad(image, pad, mode='edge')
    filtered = np.zeros_like(image)

    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            window = padded[i:i+kernel_size, j:j+kernel_size]
            # Get most common value in window
            filtered[i, j] = mode(window, axis=None, keepdims=False)[0]

    return filtered

def morphological_filter(image, kernel_size=3):
    """
    Apply morphological opening (erosion + dilation) to remove
    isolated pixels and smooth boundaries.
    """
    from scipy.ndimage import binary_erosion, binary_dilation

    # Get unique classes
    classes = np.unique(image)
    result = np.zeros_like(image)

    # Apply morphological operations per class
    for cls in classes:
        mask = (image == cls)
        # Opening: removes small isolated pixels
        opened = binary_erosion(mask, iterations=kernel_size//2)
        opened = binary_dilation(opened, iterations=kernel_size//2)
        result[opened] = cls

    # Fill any remaining zeros with majority filter
    if np.any(result == 0) and 0 not in classes:
        zero_mask = result == 0
        result[zero_mask] = majority_filter(image, kernel_size=3)[zero_mask]

    return result

def median_filter(image, kernel_size=3):
    """
    Apply median filter - good for salt-and-pepper noise.
    Uses scipy's median_filter which preserves data type.
    """
    return ndimage.median_filter(image, size=kernel_size)

def adaptive_filter(image, min_size=5, max_size=15):
    """
    Apply adaptive filtering - adjusts filter size based on local conditions.
    """
    # Start with small kernel
    filtered = majority_filter(image, kernel_size=min_size)

    # Identify areas that still need smoothing
    diff = np.abs(image.astype(int) - filtered.astype(int))
    noisy_areas = diff > 0

    # Apply larger filter to noisy areas
    if noisy_areas.sum() > 0:
        large_filtered = majority_filter(image, kernel_size=max_size)
        filtered[noisy_areas] = large_filtered[noisy_areas]

    return filtered

def process_classification_tiles(input_dir, output_dir, method='majority', kernel_size=5):

    input_path = Path(input_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    # Find all classification files
    pattern = str(input_path / "tile_*_classification.tif*")
    files = sorted(glob.glob(pattern))

    if not files:
        print(f"No classification files found in {input_dir}")
        return

    print(f"Found {len(files)} classification tiles")
    print(f"Processing with {method} filter (kernel size: {kernel_size})...")

    for i, file_path in enumerate(files):
        try:
            with rasterio.open(file_path) as src:
                # Read the data
                img = src.read(1)  # Read first band

                # Store all metadata
                profile = src.profile.copy()

                # Print CRS info for first file
                if i == 0:
                    print(f"CRS: {src.crs}")
                    print(f"Transform: {src.transform}")
                    print(f"Shape: {img.shape}")
                    print(f"Data type: {img.dtype}")

            # Apply selected filter
            if method == 'majority':
                filtered = majority_filter(img, kernel_size)
            elif method == 'morphological':
                filtered = morphological_filter(img, kernel_size)
            elif method == 'median':
                filtered = median_filter(img, kernel_size)
            elif method == 'adaptive':
                filtered = adaptive_filter(img)
            else:
                raise ValueError(f"Unknown method: {method}")

            # Ensure filtered image has same dtype as input
            filtered = filtered.astype(profile['dtype'])

            # Save the filtered image with all geospatial metadata
            filename = Path(file_path).name.replace('-classification.tif', '-filtered.tif').replace('-classification.tiff', '-filtered.tiff')
            output_file = output_path / filename

            # Write with preserved metadata
            with rasterio.open(output_file, 'w', **profile) as dst:
                dst.write(filtered, 1)

            if (i + 1) % 10 == 0:
                print(f"Processed {i + 1}/{len(files)} tiles")

        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            continue

    print(f"Done! Processed files saved to {output_dir}")
    print(f"All geospatial metadata (CRS, transform, bounds) preserved.")

def compare_filters(image_path, output_dir='comparison'):

    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    # Read the image with rasterio
    with rasterio.open(image_path) as src:
        img = src.read(1)
        profile = src.profile.copy()

    # Test different methods
    methods = {
        'original': img,
        'majority_3x3': majority_filter(img, 3),
        'majority_5x5': majority_filter(img, 5),
        'majority_7x7': majority_filter(img, 7),
        'morphological_3x3': morphological_filter(img, 3),
        'morphological_5x5': morphological_filter(img, 5),
        'median_3x3': median_filter(img, 3),
        'median_5x5': median_filter(img, 5),
        'adaptive': adaptive_filter(img)
    }

    for name, result in methods.items():
        output_file = output_path / f'{name}.tif'
        result = result.astype(profile['dtype'])

        with rasterio.open(output_file, 'w', **profile) as dst:
            dst.write(result, 1)

    print(f"Comparison images saved to {output_dir}")
    print(f"All files preserve the original CRS and geotransform")

def verify_metadata(original_path, filtered_path):

    with rasterio.open(original_path) as src_orig:
        with rasterio.open(filtered_path) as src_filt:
            print("Original file:")
            print(f"  CRS: {src_orig.crs}")
            print(f"  Transform: {src_orig.transform}")
            print(f"  Bounds: {src_orig.bounds}")
            print(f"  Shape: {src_orig.shape}")

            print("\nFiltered file:")
            print(f"  CRS: {src_filt.crs}")
            print(f"  Transform: {src_filt.transform}")
            print(f"  Bounds: {src_filt.bounds}")
            print(f"  Shape: {src_filt.shape}")

            print("\nMetadata preserved:",
                  src_orig.crs == src_filt.crs and
                  src_orig.transform == src_filt.transform)

# Example usage:
if __name__ == "__main__":

    process_classification_tiles(
        input_dir='/content/drive/MyDrive/AGRI/Planting_Method/results',
        output_dir='/content/drive/MyDrive/AGRI/Planting_Method/post-process',
        method='median', # method
        kernel_size=7 # 7x7
    )

Found 4 classification tiles
Processing with median filter (kernel size: 5)...
CRS: EPSG:4326
Transform: | 0.00, 0.00, 120.76|
| 0.00,-0.00, 16.32|
| 0.00, 0.00, 1.00|
Shape: (5888, 5888)
Data type: float32
Done! Processed files saved to /content/drive/MyDrive/AGRI/Planting_Method/post-process
All geospatial metadata (CRS, transform, bounds) preserved.
