In [None]:
pip install tqdm

In [None]:
#!/usr/bin/env python3
import os
import pandas as pd
import rasterio
from rasterio.windows import Window
from rasterio.warp import calculate_default_transform, reproject, Resampling
from shapely.geometry import Point, box
import numpy as np
from pathlib import Path
from tqdm import tqdm

def reproject_raster(src_path, dst_path, dst_crs='EPSG:3035'):
    """
    Reproject a raster to EPSG:3035 (or other specified CRS) and save as a new file.
    
    Parameters:
    -----------
    src_path : str
        Path to the source raster
    dst_path : str
        Path to save the reprojected raster
    dst_crs : str, default='EPSG:3035'
        Target CRS (default is EPSG:3035 as specified)
    """
    with rasterio.open(src_path) as src:
        # Calculate the ideal dimensions and transformation for the new raster
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        
        # Update the metadata for the output file
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })
        
        # Create the reprojected raster
        with rasterio.open(dst_path, 'w', **kwargs) as dst:
            # Set up progress bar for reprojection bands
            band_pbar = tqdm(range(1, src.count + 1), desc="Reprojecting bands", unit="band")
            
            # Iterate through bands and reproject each one
            for i in band_pbar:
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest
                )
    
    print(f"Reprojected raster saved to: {dst_path}")
    return dst_path

def clip_around_midpoints(raster_dir, csv_path, output_dir, pixel_buffer=7, target_crs='EPSG:3035'):
    """
    Process satellite imagery based on midpoints from a CSV file.
    
    1. Read all raster files in the directory
    2. Read the CSV with midpoints
    3. For each raster:
       - Reproject to EPSG:3035 if needed
       - For each midpoint, clip the raster around the midpoint if it overlaps
    
    Parameters:
    -----------
    raster_dir : str
        Directory containing satellite imagery files (.tif)
    csv_path : str
        Path to the CSV file containing midpoint coordinates
    output_dir : str
        Directory to save clipped images
    pixel_buffer : int, default=7
        Number of pixels to extend in each direction from the midpoint
    target_crs : str, default='EPSG:3035'
        Target CRS for reprojection
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Read the CSV file with midpoints
    print(f"Reading midpoints from: {csv_path}")
    df = pd.read_csv(csv_path)
    
    # Check if 'Midpoint' column exists
    if 'Midpoint' not in df.columns:
        raise ValueError("CSV file must contain a 'Midpoint' column with coordinates")
    
    print(f"Found {len(df)} midpoints in CSV file")
    
    # Function to parse the midpoint string to x, y coordinates
    def parse_midpoint(midpoint_str):
        # Assuming format like "POINT (x y)" or "x,y" or similar
        # This may need adjustment based on your actual midpoint format
        if isinstance(midpoint_str, str):
            if "POINT" in midpoint_str:
                # WKT format
                coords = midpoint_str.replace("POINT (", "").replace(")", "").split()
            elif "," in midpoint_str:
                # Comma-separated format
                coords = midpoint_str.split(",")
            else:
                # Space-separated format
                coords = midpoint_str.split()
            
            return float(coords[0]), float(coords[1])
        elif isinstance(midpoint_str, (list, tuple)) and len(midpoint_str) >= 2:
            # Already a list or tuple
            return float(midpoint_str[0]), float(midpoint_str[1])
        else:
            raise ValueError(f"Cannot parse midpoint: {midpoint_str}")
    
    # Get all raster files in the directory
    print(f"Scanning for raster files in: {raster_dir}")
    raster_files = list(Path(raster_dir).glob("*.tif"))
    
    if not raster_files:
        raise ValueError(f"No .tif files found in directory: {raster_dir}")
    
    print(f"Found {len(raster_files)} raster files")
    
    # Track overall statistics
    total_processed = 0
    total_skipped = idx = 0
    
    # Main progress bar for raster files
    for raster_path in tqdm(raster_files, desc="Processing raster files", unit="file"):
        raster_name = raster_path.stem
        
        with rasterio.open(raster_path) as src:
            # Check if reprojection is needed
            if src.crs is None or src.crs.to_string() != target_crs:
                print(f"CRS mismatch or not defined. Reprojecting raster to {target_crs}...")
                # Create a temporary file for the reprojected raster
                temp_reprojected = os.path.join(output_dir, f"temp_reprojected_{raster_name}.tif")
                reprojected_path = reproject_raster(str(raster_path), temp_reprojected, target_crs)
                # Reopen with the reprojected raster
                src = rasterio.open(reprojected_path)
                print("Reprojection complete.")
            
            # Get raster bounds as a shapely polygon
            raster_bounds = box(*src.bounds)
            
            # Set up progress bar for midpoints
            midpoint_desc = f"Processing midpoints for {raster_name}"
            midpoint_pbar = tqdm(df.iterrows(), total=len(df), desc=midpoint_desc, unit="point")
            
            # Process each midpoint
            for idx, row in midpoint_pbar:
                try:
                    # Parse the midpoint coordinates
                    x, y = parse_midpoint(row['Midpoint'])
                    
                    # Create a point geometry for the midpoint
                    midpoint = Point(x, y)
                    
                    # Get the midpoint ID or use index if not available
                    midpoint_id = row.get('id', idx) if 'id' in row else idx
                    
                    # Update progress bar description with current midpoint ID
                    midpoint_pbar.set_postfix(point_id=str(midpoint_id), refresh=True)
                    
                    # Check if midpoint overlaps with raster bounds
                    if midpoint.intersects(raster_bounds):
                        try:
                            # Convert geographic coordinates to pixel coordinates
                            py, px = src.index(x, y)
                            
                            # Define the window for clipping (bounding box)
                            window = Window(
                                px - pixel_buffer, 
                                py - pixel_buffer, 
                                pixel_buffer * 2 + 1,  # +1 to include the center pixel
                                pixel_buffer * 2 + 1
                            )
                            
                            # Check if window is within the image bounds
                            if (px - pixel_buffer >= 0 and py - pixel_buffer >= 0 and 
                                px + pixel_buffer < src.width and py + pixel_buffer < src.height):
                                
                                # Read the data in the window
                                clipped_data = src.read(window=window)
                                
                                # Update the output profile
                                out_profile = src.profile.copy()
                                out_profile.update({
                                    'height': window.height,
                                    'width': window.width,
                                    'transform': rasterio.windows.transform(window, src.transform)
                                })
                                
                                # Create output filename
                                output_filename = f"{midpoint_id}_{raster_name}.tif"
                                output_path = os.path.join(output_dir, output_filename)
                                
                                # Write the clipped image
                                with rasterio.open(output_path, 'w', **out_profile) as dst:
                                    dst.write(clipped_data)
                                
                                total_processed += 1
                            else:
                                # Midpoint is too close to image edge
                                total_skipped += 1
                        except Exception as e:
                            total_skipped += 1
                            tqdm.write(f"Error processing midpoint {midpoint_id}: {e}")
                    else:
                        # Midpoint does not overlap with raster
                        total_skipped += 1
                except Exception as e:
                    total_skipped += 1
                    tqdm.write(f"Error parsing midpoint at index {idx}: {e}")
            
            # Clean up temporary reprojected file if it was created
            if src.name != str(raster_path):
                src.close()
                if os.path.exists(src.name):
                    os.remove(src.name)
                    print(f"Removed temporary reprojected file: {src.name}")
    
    # Print summary statistics
    print("\nProcessing complete!")
    print(f"Total clips created: {total_processed}")
    print(f"Total midpoints skipped: {total_skipped}")
    print(f"Output directory: {output_dir}")

if __name__ == "__main__":
    # Input and output directories
    raster_dir = r"C:\SatelliteImagery"
    csv_path = r"C:\SatelliteImagery\Dataset_Malaga.csv"  # Update with your actual CSV path
    output_dir = r"C:\SatelliteImagery\clipped_outputs"
    
    # Set buffer size (pixels to extend in each direction from the midpoint)
    pixel_buffer = 7  # This will create 15x15 images (7 pixels on each side plus the center)
    
    # Target CRS
    target_crs = 'EPSG:3035'  # European projection as specified
    
    # Process the images
    clip_around_midpoints(raster_dir, csv_path, output_dir, pixel_buffer=pixel_buffer, target_crs=target_crs)

Reading midpoints from: C:\SatelliteImagery\Dataset_Malaga.csv
Found 15987 midpoints in CSV file
Scanning for raster files in: C:\SatelliteImagery
Found 1 raster files


Processing raster files:   0%|                                                                 | 0/1 [00:00<?, ?file/s]

CRS mismatch or not defined. Reprojecting raster to EPSG:3035...



Reprojecting bands:   0%|                                                                      | 0/8 [00:00<?, ?band/s][A
Reprojecting bands:  12%|███████▊                                                      | 1/8 [01:18<09:11, 78.72s/band][A
Reprojecting bands:  25%|███████████████▎                                             | 2/8 [04:23<14:06, 141.09s/band][A
Reprojecting bands:  38%|██████████████████████▉                                      | 3/8 [07:44<14:03, 168.68s/band][A
Reprojecting bands:  50%|██████████████████████████████▌                              | 4/8 [12:08<13:44, 206.02s/band][A
Reprojecting bands:  62%|██████████████████████████████████████▏                      | 5/8 [17:47<12:42, 254.11s/band][A
Reprojecting bands:  75%|█████████████████████████████████████████████▊               | 6/8 [23:40<09:35, 287.84s/band][A
Reprojecting bands:  88%|█████████████████████████████████████████████████████▍       | 7/8 [29:36<05:09, 309.86s/band][A
Reprojecting ba

Reprojected raster saved to: C:\SatelliteImagery\clipped_outputs\temp_reprojected_Test1.tif
Reprojection complete.



Processing midpoints for Test1:   0%|                                                     | 0/15987 [00:00<?, ?point/s][A
Processing midpoints for Test1:   0%|                                         | 0/15987 [00:00<?, ?point/s, point_id=0][A
Processing midpoints for Test1:   0%|                                         | 0/15987 [00:00<?, ?point/s, point_id=1][A
Processing midpoints for Test1:   0%|                                         | 0/15987 [00:00<?, ?point/s, point_id=2][A
Processing midpoints for Test1:   0%|                                         | 0/15987 [00:00<?, ?point/s, point_id=3][A
Processing midpoints for Test1:   0%|                                         | 0/15987 [00:00<?, ?point/s, point_id=4][A
Processing midpoints for Test1:   0%|                                         | 0/15987 [00:00<?, ?point/s, point_id=5][A
Processing midpoints for Test1:   0%|                                         | 0/15987 [00:00<?, ?point/s, point_id=6][A
Processing midp