In [1]:
import os
import glob
import geopandas as gpd
import pandas as pd
from tqdm.notebook import tqdm

# Define your paths here
input_folder = "/Users/santi/DataspellProjects/Lithium-Jakob/Data/NDVI_Seasonal"  # Change this to your folder with the 26 GeoJSON files
output_file = "/Users/santi/DataspellProjects/Lithium-Jakob/Data/lithium_ndvi_seasonal.geojson"  # Change this to your desired output path
pattern = "NDVI_Quarterly_batch_*_of_26.geojson"

# Get a list of all GeoJSON batch files
batch_files = glob.glob(os.path.join(input_folder, pattern))

if not batch_files:
    print(f"No files matching '{pattern}' found in '{input_folder}'")
else:
    print(f"Found {len(batch_files)} GeoJSON batch files")

    # Display the files we found
    for file in batch_files:
        print(f" - {os.path.basename(file)}")

    # Initialize an empty GeoDataFrame to store all features
    all_features = None

    # Process each batch file
    for i, file_path in enumerate(tqdm(batch_files, desc="Concatenating files")):
        try:
            # Read the GeoJSON file
            batch_gdf = gpd.read_file(file_path)

            # Print some info about this batch
            print(f"\nBatch {i+1}/{len(batch_files)}: {os.path.basename(file_path)}")
            print(f" - Features: {len(batch_gdf)}")
            print(f" - Columns: {list(batch_gdf.columns)}")

            # If this is the first batch, initialize all_features
            if all_features is None:
                all_features = batch_gdf
            else:
                # Append to the existing GeoDataFrame
                all_features = gpd.GeoDataFrame(
                    pd.concat([all_features, batch_gdf], ignore_index=True),
                    crs=all_features.crs
                )

        except Exception as e:
            print(f"Error processing {file_path}: {str(e)}")

    if all_features is not None and not all_features.empty:
        # Show info about the consolidated data
        print("\nConsolidated Data Info:")
        print(f" - Total Features: {len(all_features)}")
        print(f" - CRS: {all_features.crs}")
        print(f" - Columns: {list(all_features.columns)}")

        # Save the consolidated GeoDataFrame to a GeoJSON file
        print(f"\nSaving consolidated data to {output_file}")
        all_features.to_file(output_file, driver="GeoJSON")
        print("Concatenation complete!")

        # Optionally preview some data
        print("\nPreview of consolidated data:")
        display(all_features.head())
    else:
        print("No data to save")

Found 26 GeoJSON batch files
 - NDVI_Quarterly_batch_16_of_26.geojson
 - NDVI_Quarterly_batch_12_of_26.geojson
 - NDVI_Quarterly_batch_15_of_26.geojson
 - NDVI_Quarterly_batch_11_of_26.geojson
 - NDVI_Quarterly_batch_8_of_26.geojson
 - NDVI_Quarterly_batch_6_of_26.geojson
 - NDVI_Quarterly_batch_2_of_26.geojson
 - NDVI_Quarterly_batch_22_of_26.geojson
 - NDVI_Quarterly_batch_26_of_26.geojson
 - NDVI_Quarterly_batch_21_of_26.geojson
 - NDVI_Quarterly_batch_18_of_26.geojson
 - NDVI_Quarterly_batch_25_of_26.geojson
 - NDVI_Quarterly_batch_5_of_26.geojson
 - NDVI_Quarterly_batch_1_of_26.geojson
 - NDVI_Quarterly_batch_24_of_26.geojson
 - NDVI_Quarterly_batch_19_of_26.geojson
 - NDVI_Quarterly_batch_20_of_26.geojson
 - NDVI_Quarterly_batch_4_of_26.geojson
 - NDVI_Quarterly_batch_3_of_26.geojson
 - NDVI_Quarterly_batch_7_of_26.geojson
 - NDVI_Quarterly_batch_23_of_26.geojson
 - NDVI_Quarterly_batch_10_of_26.geojson
 - NDVI_Quarterly_batch_14_of_26.geojson
 - NDVI_Quarterly_batch_9_of_26.geoj

Concatenating files:   0%|          | 0/26 [00:00<?, ?it/s]


Batch 1/26: NDVI_Quarterly_batch_16_of_26.geojson
 - Features: 25
 - Columns: ['id', 'Q1_Summer_2000', 'Q1_Summer_2001', 'Q1_Summer_2002', 'Q1_Summer_2003', 'Q1_Summer_2004', 'Q1_Summer_2005', 'Q1_Summer_2006', 'Q1_Summer_2007', 'Q1_Summer_2008', 'Q1_Summer_2009', 'Q1_Summer_2010', 'Q1_Summer_2011', 'Q1_Summer_2012', 'Q1_Summer_2013', 'Q1_Summer_2014', 'Q1_Summer_2015', 'Q1_Summer_2016', 'Q1_Summer_2017', 'Q1_Summer_2018', 'Q1_Summer_2019', 'Q1_Summer_2020', 'Q1_Summer_2021', 'Q1_Summer_2022', 'Q2_Fall_2000', 'Q2_Fall_2001', 'Q2_Fall_2002', 'Q2_Fall_2003', 'Q2_Fall_2004', 'Q2_Fall_2005', 'Q2_Fall_2006', 'Q2_Fall_2007', 'Q2_Fall_2008', 'Q2_Fall_2009', 'Q2_Fall_2010', 'Q2_Fall_2011', 'Q2_Fall_2012', 'Q2_Fall_2013', 'Q2_Fall_2014', 'Q2_Fall_2015', 'Q2_Fall_2016', 'Q2_Fall_2017', 'Q2_Fall_2018', 'Q2_Fall_2019', 'Q2_Fall_2020', 'Q2_Fall_2021', 'Q2_Fall_2022', 'Q3_Winter_2000', 'Q3_Winter_2001', 'Q3_Winter_2002', 'Q3_Winter_2003', 'Q3_Winter_2004', 'Q3_Winter_2005', 'Q3_Winter_2006', 'Q3_Wi

Unnamed: 0,id,Q1_Summer_2000,Q1_Summer_2001,Q1_Summer_2002,Q1_Summer_2003,Q1_Summer_2004,Q1_Summer_2005,Q1_Summer_2006,Q1_Summer_2007,Q1_Summer_2008,...,Q4_Spring_2015,Q4_Spring_2016,Q4_Spring_2017,Q4_Spring_2018,Q4_Spring_2019,Q4_Spring_2020,Q4_Spring_2021,Q4_Spring_2022,h3_address,geometry
0,0,0.149021,0.171657,0.158073,0.12639,0.149216,0.139421,0.159824,0.15006,0.162447,...,0.089104,0.091958,0.089424,0.103817,0.099791,0.092578,0.098584,0.09941,86b373637ffffff,"POLYGON ((-65.64122 -22.29340, -65.66920 -22.3..."
1,1,0.097082,0.109471,0.097445,0.079684,0.111682,0.107227,0.113961,0.107576,0.111917,...,0.085396,0.083215,0.081184,0.086708,0.084875,0.086398,0.084174,0.08486,86b355577ffffff,"POLYGON ((-65.82428 -22.98651, -65.85239 -23.0..."
2,2,0.168326,0.189706,0.197025,0.170596,0.185124,0.180152,0.184862,0.168688,0.183193,...,0.14186,0.139842,0.139932,0.154079,0.147188,0.14322,0.146748,0.143831,86b35588fffffff,"POLYGON ((-65.45068 -22.76412, -65.47864 -22.7..."
3,3,0.143292,0.16479,0.169485,0.155964,0.163626,0.159558,0.164549,0.158746,0.163522,...,0.124659,0.12539,0.125137,0.137202,0.131082,0.132215,0.1297,0.131582,86b3558b7ffffff,"POLYGON ((-65.52514 -22.67709, -65.55311 -22.6..."
4,4,0.119441,0.142319,0.121587,0.103072,0.123384,0.130406,0.129415,0.115871,0.131485,...,0.09148,0.090169,0.088645,0.093256,0.08949,0.093881,0.089409,0.0897,86b35554fffffff,"POLYGON ((-65.74960 -23.07349, -65.77769 -23.0..."


In [3]:
all_features.columns

Index(['id', 'Q1_Summer_2000', 'Q1_Summer_2001', 'Q1_Summer_2002',
       'Q1_Summer_2003', 'Q1_Summer_2004', 'Q1_Summer_2005', 'Q1_Summer_2006',
       'Q1_Summer_2007', 'Q1_Summer_2008', 'Q1_Summer_2009', 'Q1_Summer_2010',
       'Q1_Summer_2011', 'Q1_Summer_2012', 'Q1_Summer_2013', 'Q1_Summer_2014',
       'Q1_Summer_2015', 'Q1_Summer_2016', 'Q1_Summer_2017', 'Q1_Summer_2018',
       'Q1_Summer_2019', 'Q1_Summer_2020', 'Q1_Summer_2021', 'Q1_Summer_2022',
       'Q2_Fall_2000', 'Q2_Fall_2001', 'Q2_Fall_2002', 'Q2_Fall_2003',
       'Q2_Fall_2004', 'Q2_Fall_2005', 'Q2_Fall_2006', 'Q2_Fall_2007',
       'Q2_Fall_2008', 'Q2_Fall_2009', 'Q2_Fall_2010', 'Q2_Fall_2011',
       'Q2_Fall_2012', 'Q2_Fall_2013', 'Q2_Fall_2014', 'Q2_Fall_2015',
       'Q2_Fall_2016', 'Q2_Fall_2017', 'Q2_Fall_2018', 'Q2_Fall_2019',
       'Q2_Fall_2020', 'Q2_Fall_2021', 'Q2_Fall_2022', 'Q3_Winter_2000',
       'Q3_Winter_2001', 'Q3_Winter_2002', 'Q3_Winter_2003', 'Q3_Winter_2004',
       'Q3_Winter_2005', 'Q3_Wi