<a href="https://colab.research.google.com/github/ulfboge/temporal-landcover-vectorizer/blob/main/scripts/python/raster_to_vector_with_csv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Raster to Vector Conversion Tool

This notebook converts multi-band raster files into vector formats (points and polygons) and CSV files. It processes each pixel in the raster and creates corresponding geometries while preserving the band values.

## Features:
- Converts raster pixels to point and polygon shapefiles
- Generates CSV files with pixel values
- Handles multi-band rasters (up to 6 bands)
- Skips pixels where all bands are 0
- Maintains spatial reference and coordinates

## Setup
First, let's mount Google Drive and install required packages.

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Install required packages
%%capture
!apt-get update
!apt-get install -y gdal-bin python3-gdal
!pip install pandas numpy==1.24.3 gdal==3.4.3

## Import Libraries and Set Up Directories

In [None]:
from osgeo import gdal, ogr, osr
import os
import pandas as pd
import glob

# Define Base Directories
base_directory = "/content/drive/MyDrive/earthengine/conversion"

# Subdirectories
raster_folder = os.path.join(base_directory, "raster")
vector_folder = os.path.join(base_directory, "vector")
csv_folder = os.path.join(base_directory, "csv")

# Create output directories
for folder in [raster_folder, vector_folder, csv_folder]:
    os.makedirs(folder, exist_ok=True)
    print(f"Created directory: {folder}")

## Find and List Raster Files

In [None]:
# Find all raster files
raster_files = []
for file in os.listdir(raster_folder):
    if file.endswith('.tif'):
        full_path = os.path.join(raster_folder, file)
        if os.path.isfile(full_path):
            raster_files.append(full_path)

if not raster_files:
    raise FileNotFoundError(f"No raster files found in '{raster_folder}'.")

print("Found the following raster files:")
for file in raster_files:
    print(f"  - {os.path.basename(file)}")

# Define band names
band_names = ["y2013", "y2015", "y2017", "y2019", "y2021", "y2023"]

## Process Raster Files

This cell processes each raster file and creates:
1. Point shapefile
2. Polygon shapefile
3. Raw CSV file
4. Cleaned CSV file

In [None]:
# Process each raster file
for raster_path in raster_files:
    raster_name = os.path.splitext(os.path.basename(raster_path))[0]
    print(f"\nProcessing: {raster_name}")

    # Define output paths
    output_point_shapefile = os.path.join(vector_folder, f"{raster_name}_points.shp")
    output_polygon_shapefile = os.path.join(vector_folder, f"{raster_name}_polygons.shp")
    output_csv_path = os.path.join(csv_folder, f"{raster_name}_vectorized.csv")
    cleaned_csv_path = os.path.join(csv_folder, f"{raster_name}_vectorized_cleaned.csv")

    # Load raster
    raster_ds = gdal.Open(raster_path)
    if raster_ds is None:
        print(f"Skipping {raster_name}: Could not open raster file.")
        continue

    # Get raster properties
    transform = raster_ds.GetGeoTransform()
    num_bands = raster_ds.RasterCount
    raster_width = raster_ds.RasterXSize
    raster_height = raster_ds.RasterYSize
    origin_x, pixel_width, _, origin_y, _, pixel_height = transform

    # Read raster data
    band_arrays = [raster_ds.GetRasterBand(b).ReadAsArray() for b in range(1, num_bands + 1)]

    # Create shapefiles
    driver = ogr.GetDriverByName("ESRI Shapefile")

    # Set up point and polygon shapefiles
    for output_file in [output_point_shapefile, output_polygon_shapefile]:
        if os.path.exists(output_file):
            driver.DeleteDataSource(output_file)

    point_ds = driver.CreateDataSource(output_point_shapefile)
    polygon_ds = driver.CreateDataSource(output_polygon_shapefile)

    # Set spatial reference
    spatial_ref = osr.SpatialReference()
    spatial_ref.ImportFromWkt(raster_ds.GetProjection())

    # Create layers
    point_layer = point_ds.CreateLayer(f"{raster_name}_points", spatial_ref, ogr.wkbPoint)
    polygon_layer = polygon_ds.CreateLayer(f"{raster_name}_polygons", spatial_ref, ogr.wkbPolygon)

    # Create fields
    for layer in [point_layer, polygon_layer]:
        layer.CreateField(ogr.FieldDefn("pixel_id", ogr.OFTInteger))
        layer.CreateField(ogr.FieldDefn("x_coord", ogr.OFTReal))
        layer.CreateField(ogr.FieldDefn("y_coord", ogr.OFTReal))

        actual_band_names = band_names[:num_bands]
        for band_name in actual_band_names:
            layer.CreateField(ogr.FieldDefn(band_name, ogr.OFTInteger))

    # Process pixels
    csv_data = []
    pixel_id = 1

    for row in range(raster_height):
        for col in range(raster_width):
            x_coord = origin_x + col * pixel_width
            y_coord = origin_y + row * pixel_height
            pixel_values = [band_arrays[b - 1][row, col] for b in range(1, num_bands + 1)]

            if all(v == 0 for v in pixel_values):
                continue

            # Create geometries and features
            point = ogr.Geometry(ogr.wkbPoint)
            point.AddPoint(x_coord, y_coord)

            ring = ogr.Geometry(ogr.wkbLinearRing)
            ring.AddPoint(x_coord, y_coord)
            ring.AddPoint(x_coord + pixel_width, y_coord)
            ring.AddPoint(x_coord + pixel_width, y_coord + pixel_height)
            ring.AddPoint(x_coord, y_coord + pixel_height)
            ring.AddPoint(x_coord, y_coord)

            polygon = ogr.Geometry(ogr.wkbPolygon)
            polygon.AddGeometry(ring)

            # Add features to layers
            for layer, geom in [(point_layer, point), (polygon_layer, polygon)]:
                feature = ogr.Feature(layer.GetLayerDefn())
                feature.SetGeometry(geom)
                feature.SetField("pixel_id", pixel_id)
                feature.SetField("x_coord", x_coord)
                feature.SetField("y_coord", y_coord)

                for band_idx, band_name in enumerate(actual_band_names):
                    feature.SetField(band_name, int(pixel_values[band_idx]))

                layer.CreateFeature(feature)

            csv_data.append([pixel_id, round(x_coord, 6), round(y_coord, 6)] +
                           [int(v) for v in pixel_values])
            pixel_id += 1

    # Clean up
    point_ds = None
    polygon_ds = None
    raster_ds = None

    # Create CSVs
    csv_columns = ["pixel_id", "x_coord", "y_coord"] + actual_band_names
    df = pd.DataFrame(csv_data, columns=csv_columns)
    df.to_csv(output_csv_path, index=False)

    df[actual_band_names] = df[actual_band_names].applymap(lambda x: int(x) if pd.notnull(x) else "")
    df.to_csv(cleaned_csv_path, index=False)

    print(f"Created outputs for {raster_name}:")
    print(f"  - Point shapefile: {os.path.basename(output_point_shapefile)}")
    print(f"  - Polygon shapefile: {os.path.basename(output_polygon_shapefile)}")
    print(f"  - CSV files: {os.path.basename(output_csv_path)} and {os.path.basename(cleaned_csv_path)}")

print("\nProcessing complete for all rasters.")

## Results

The script has created the following outputs in your Google Drive:
1. Point shapefiles (.shp) in the 'vector' folder
2. Polygon shapefiles (.shp) in the 'vector' folder
3. Raw CSV files in the 'csv' folder
4. Cleaned CSV files in the 'csv' folder

You can find these files in the following location:
```
/content/drive/MyDrive/earthengine/conversion
    ├── raster/
    ├── vector/
    └── csv/
```