<a href="https://colab.research.google.com/github/ulfboge/temporal-landcover-vectorizer/blob/main/scripts/python/raster_to_vector_with_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Raster to Vector Conversion Tool

This notebook converts multi-band raster files into vector formats (points and polygons) and CSV files. It processes each pixel in the raster and creates corresponding geometries while preserving the band values.

## Features:
- Converts raster pixels to point and polygon shapefiles
- Generates CSV files with pixel values (with and without coordinates)
- Handles multi-band rasters (up to 6 bands)
- Skips pixels where all bands are 0
- Maintains spatial reference and coordinates

## Setup
First, let's mount Google Drive and install required packages.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install required packages
%%capture
!apt-get update
!apt-get install -y gdal-bin python3-gdal
!pip install pandas numpy==1.24.3 gdal==3.4.3

## Import Libraries and Set Up Directories

In [None]:
from osgeo import gdal, ogr, osr
import os
import pandas as pd
import glob

# Define Base Directories
base_directory = "/content/drive/MyDrive/earthengine/conversion"

# Subdirectories
raster_folder = os.path.join(base_directory, "raster")
vector_folder = os.path.join(base_directory, "vector")
csv_folder = os.path.join(base_directory, "csv")

# Create output directories
for folder in [raster_folder, vector_folder, csv_folder]:
    os.makedirs(folder, exist_ok=True)
    print(f"Created directory: {folder}")

## Find and List Raster Files

In [None]:
# Find all raster files
raster_files = []
for file in os.listdir(raster_folder):
    if file.endswith('.tif'):
        full_path = os.path.join(raster_folder, file)
        if os.path.isfile(full_path):
            raster_files.append(full_path)

if not raster_files:
    raise FileNotFoundError(f"No raster files found in '{raster_folder}'.")

print("Found the following raster files:")
for file in raster_files:
    print(f"  - {os.path.basename(file)}")

# Define band names
band_names = ["y2013", "y2015", "y2017", "y2019", "y2021", "y2023"]

## Step 1: Load and Prepare Raster Data

In [None]:
def load_raster_data(raster_path):
    """Load raster data and return its properties"""
    raster_ds = gdal.Open(raster_path)
    if raster_ds is None:
        raise ValueError(f"Could not open raster file: {raster_path}")
        
    transform = raster_ds.GetGeoTransform()
    num_bands = raster_ds.RasterCount
    raster_width = raster_ds.RasterXSize
    raster_height = raster_ds.RasterYSize
    origin_x, pixel_width, _, origin_y, _, pixel_height = transform
    band_arrays = [raster_ds.GetRasterBand(b).ReadAsArray() for b in range(1, num_bands + 1)]
    spatial_ref = osr.SpatialReference()
    spatial_ref.ImportFromWkt(raster_ds.GetProjection())
    
    return {
        'raster_ds': raster_ds,
        'transform': transform,
        'num_bands': num_bands,
        'width': raster_width,
        'height': raster_height,
        'origin_x': origin_x,
        'origin_y': origin_y,
        'pixel_width': pixel_width,
        'pixel_height': pixel_height,
        'band_arrays': band_arrays,
        'spatial_ref': spatial_ref
    }

## Step 2: Create Vector Layers

In [None]:
def create_vector_layers(raster_name, spatial_ref, num_bands, actual_band_names):
    """Create and set up vector layers for points and polygons"""
    driver = ogr.GetDriverByName("ESRI Shapefile")
    
    # Define output paths
    output_point_shapefile = os.path.join(vector_folder, f"{raster_name}_points.shp")
    output_polygon_shapefile = os.path.join(vector_folder, f"{raster_name}_polygons.shp")
    
    # Remove existing files if they exist
    for output_file in [output_point_shapefile, output_polygon_shapefile]:
        if os.path.exists(output_file):
            driver.DeleteDataSource(output_file)
    
    # Create new datasources
    point_ds = driver.CreateDataSource(output_point_shapefile)
    polygon_ds = driver.CreateDataSource(output_polygon_shapefile)
    
    # Create layers
    point_layer = point_ds.CreateLayer(f"{raster_name}_points", spatial_ref, ogr.wkbPoint)
    polygon_layer = polygon_ds.CreateLayer(f"{raster_name}_polygons", spatial_ref, ogr.wkbPolygon)
    
    # Create fields for both layers
    for layer in [point_layer, polygon_layer]:
        layer.CreateField(ogr.FieldDefn("pixel_id", ogr.OFTInteger))
        layer.CreateField(ogr.FieldDefn("x_coord", ogr.OFTReal))
        layer.CreateField(ogr.FieldDefn("y_coord", ogr.OFTReal))
        
        for band_name in actual_band_names:
            layer.CreateField(ogr.FieldDefn(band_name, ogr.OFTInteger))
    
    return point_layer, polygon_layer, point_ds, polygon_ds

## Step 3: Process Pixels and Create Features

In [None]:
def process_pixels(raster_data, point_layer, polygon_layer, actual_band_names):
    """Process pixels and create vector features and CSV data"""
    csv_data = []
    pixel_id = 1
    
    for row in range(raster_data['height']):
        for col in range(raster_data['width']):
            x_coord = raster_data['origin_x'] + col * raster_data['pixel_width']
            y_coord = raster_data['origin_y'] + row * raster_data['pixel_height']
            pixel_values = [raster_data['band_arrays'][b][row, col] for b in range(raster_data['num_bands'])]
            
            if all(v == 0 for v in pixel_values):
                continue
                
            # Create point geometry
            point = ogr.Geometry(ogr.wkbPoint)
            point.AddPoint(x_coord, y_coord)
            
            # Create polygon geometry
            ring = ogr.Geometry(ogr.wkbLinearRing)
            ring.AddPoint(x_coord, y_coord)
            ring.AddPoint(x_coord + raster_data['pixel_width'], y_coord)
            ring.AddPoint(x_coord + raster_data['pixel_width'], y_coord + raster_data['pixel_height'])
            ring.AddPoint(x_coord, y_coord + raster_data['pixel_height'])
            ring.AddPoint(x_coord, y_coord)
            
            polygon = ogr.Geometry(ogr.wkbPolygon)
            polygon.AddGeometry(ring)
            
            # Add features to layers
            for layer, geom in [(point_layer, point), (polygon_layer, polygon)]:
                feature = ogr.Feature(layer.GetLayerDefn())
                feature.SetGeometry(geom)
                feature.SetField("pixel_id", pixel_id)
                feature.SetField("x_coord", x_coord)
                feature.SetField("y_coord", y_coord)
                
                for band_idx, band_name in enumerate(actual_band_names):
                    feature.SetField(band_name, int(pixel_values[band_idx]))
                    
                layer.CreateFeature(feature)
            
            csv_data.append([pixel_id, round(x_coord, 6), round(y_coord, 6)] +
                           [int(v) for v in pixel_values])
            pixel_id += 1
            
    return csv_data

## Step 4: Create CSV Outputs

In [None]:
def create_csv_output(csv_data, raster_name, actual_band_names):
    """Create two CSV files from processed data - one with coordinates and one without"""
    # Full CSV with coordinates
    output_csv_path = os.path.join(csv_folder, f"{raster_name}_vectorized.csv")
    output_csv_no_coords_path = os.path.join(csv_folder, f"{raster_name}_vectorized_no_coords.csv")
    
    # Create full DataFrame with all columns
    full_columns = ["pixel_id", "x_coord", "y_coord"] + actual_band_names
    df_full = pd.DataFrame(csv_data, columns=full_columns)
    df_full.to_csv(output_csv_path, index=False)
    
    # Create DataFrame without coordinate columns
    df_no_coords = df_full.drop(columns=["x_coord", "y_coord"])
    df_no_coords.to_csv(output_csv_no_coords_path, index=False)
    
    return output_csv_path, output_csv_no_coords_path

## Main Processing Loop

In [None]:
# Process each raster file
for raster_path in raster_files:
    raster_name = os.path.splitext(os.path.basename(raster_path))[0]
    print(f"\nProcessing: {raster_name}")
    
    try:
        # Step 1: Load raster data
        print("Loading raster data...")
        raster_data = load_raster_data(raster_path)
        actual_band_names = band_names[:raster_data['num_bands']]
        
        # Step 2: Create vector layers
        print("Creating vector layers...")
        point_layer, polygon_layer, point_ds, polygon_ds = create_vector_layers(
            raster_name, raster_data['spatial_ref'], raster_data['num_bands'], actual_band_names)
        
        # Step 3: Process pixels
        print("Processing pixels...")
        csv_data = process_pixels(raster_data, point_layer, polygon_layer, actual_band_names)
        
        # Step 4: Create CSV outputs
        print("Creating CSV outputs...")
        output_csv_path, output_csv_no_coords_path = create_csv_output(csv_data, raster_name, actual_band_names)
        
        # Clean up
        point_ds = None
        polygon_ds = None
        raster_data['raster_ds'] = None
        
        print(f"Successfully processed {raster_name}:")
        print(f"  - Point shapefile: {raster_name}_points.shp")
        print(f"  - Polygon shapefile: {raster_name}_polygons.shp")
        print(f"  - CSV files:")
        print(f"    * {os.path.basename(output_csv_path)} (with coordinates)")
        print(f"    * {os.path.basename(output_csv_no_coords_path)} (without coordinates)")
        
    except Exception as e:
        print(f"Error processing {raster_name}: {str(e)}")
        continue

print("\nProcessing complete for all rasters.")

## Results

The script has created the following outputs in your Google Drive:
1. Point shapefiles (.shp) in the 'vector' folder
2. Polygon shapefiles (.shp) in the 'vector' folder
3. Two CSV files in the 'csv' folder for each raster:
   - `*_vectorized.csv` (with coordinates)
   - `*_vectorized_no_coords.csv` (without coordinates)

You can find these files in the following location:
```
/content/drive/MyDrive/earthengine/conversion
    ├── raster/
    ├── vector/
    └── csv/
```