# NDVI Vector Data Processing

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/YOUR_USERNAME/YOUR_REPO/blob/main/combine_ndvi_vectors.ipynb)

This notebook processes NDVI vector files stored in Google Drive, combining data from multiple years (2013-2023) into single geopackage files per area.

## Input Data Structure
- Location: `/content/drive/MyDrive/earthengine/conversion/vector`
- File naming pattern: `NDVI_JanMay_YYYY_Area_N_polygons` where:
  - YYYY: Year (2013, 2015, 2017, 2019, 2021, 2023)
  - N: Area number

## Output
- One geopackage (.gpkg) file per area
- Combined attributes across all years
- Output columns: fid, pixel_id, x_coord_left, y_coord_left, y2013, y2015, y2017, y2019, y2021, y2023, x_coord_right, y_coord_right

## Setup
First, let's install and import required libraries and mount Google Drive

In [None]:
# Install required packages
!pip install geopandas pandas numpy

In [None]:
# Import required libraries
import os
import glob
import pandas as pd
import geopandas as gpd
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

## Define Processing Functions

In [None]:
def get_area_files(base_path, area_number):
    """Get all NDVI files for a specific area across years."""
    pattern = os.path.join(base_path, f'NDVI_JanMay_*_Area_{area_number}_polygons')
    return glob.glob(pattern)

def extract_year(filename):
    """Extract year from filename."""
    parts = filename.split('_')
    return next(part for part in parts if part.isdigit())

def process_area(base_path, area_number):
    """Process all files for a specific area and combine years."""
    files = get_area_files(base_path, area_number)
    if not files:
        print(f"No files found for Area {area_number}")
        return None
    
    # Read first file to get base structure
    base_gdf = gpd.read_file(files[0])
    year = extract_year(files[0])
    base_gdf = base_gdf.rename(columns={'y': f'y{year}'})
    
    # Process remaining files
    for file in files[1:]:
        year = extract_year(file)
        temp_gdf = gpd.read_file(file)
        temp_gdf = temp_gdf.rename(columns={'y': f'y{year}'})
        
        # Merge based on pixel_id
        base_gdf = base_gdf.merge(
            temp_gdf[['pixel_id', f'y{year}', 'geometry']],
            on=['pixel_id', 'geometry'],
            how='outer'
        )
    
    return base_gdf

## Process All Areas

In [None]:
# Set the base path
base_path = '/content/drive/MyDrive/earthengine/conversion/vector'

# Get unique area numbers from existing files
all_files = glob.glob(os.path.join(base_path, 'NDVI_JanMay_*_Area_*_polygons'))
area_numbers = set()
for file in all_files:
    parts = file.split('_')
    area_idx = parts.index('Area')
    if area_idx + 1 < len(parts):
        area_numbers.add(parts[area_idx + 1])

# Process each area
for area_num in sorted(area_numbers):
    print(f"Processing Area {area_num}...")
    result_gdf = process_area(base_path, area_num)
    
    if result_gdf is not None:
        # Save to geopackage
        output_path = os.path.join(base_path, f'NDVI_Combined_Area_{area_num}.gpkg')
        result_gdf.to_file(output_path, driver='GPKG')
        print(f"Saved combined data for Area {area_num} to {output_path}")

print("\nProcessing complete!")

## Verification
Let's verify the structure of one of the output files

In [None]:
# Read and display the structure of the first processed file
sample_file = glob.glob(os.path.join(base_path, 'NDVI_Combined_Area_*.gpkg'))[0]
sample_gdf = gpd.read_file(sample_file)
print("\nOutput file structure:")
print(sample_gdf.columns)
print("\nFirst few rows:")
sample_gdf.head()