In [None]:
import pandas as pd
import geopandas as gpd
import os

def calculate_percentiles(file_name, column_name):
    # Load the CSV file
    data = pd.read_csv(file_name)
    
    # Ensure column is numeric
    data[column_name] = pd.to_numeric(data[column_name], errors='coerce')
    
    # Create a new column for the percentile
    subindex = file_name.split("_")[0]  # Get subindex (e.g., CDI, IDI)
    
    # Calculate percentile ranks only for non-null values
    data[subindex] = data[column_name].rank(pct=True) # Percentile scores (0 to 100)
    
    # Ensure that NaN values in the original column result in NaN in the percentile column
    data.loc[data[column_name].isnull(), subindex] = None
    
    return data

def process_file(file_name, column_name):
    # Calculate percentiles
    processed_data = calculate_percentiles(file_name, column_name)
    
    # Split GEOID into GEOID and Year
    processed_data[['GEOID', 'Year']] = processed_data['GEOID'].str.split('+', expand=True)
    processed_data['Year'] = processed_data['Year'].astype(int)
    
    # Iterate over unique years and save the data
    for year in processed_data['Year'].unique():
        year_data = processed_data[processed_data['Year'] == year]
        subindex = file_name.split("_")[0]  # Get CDI/IDI/LDI/PDI from filename
        
        # Prepare filenames
        csv_filename = f'block_groups_{year}_{subindex}.csv'
        geojson_filename = f'block_groups_{year}_{subindex}.geojson'
        
        # Update CSV
        if os.path.exists(csv_filename):
            csv_data = pd.read_csv(csv_filename)
            
            year_data['GEOID'] = year_data['GEOID'].astype('int64')
            csv_data[subindex] = csv_data['GEOID'].map(year_data.set_index('GEOID')[subindex])
            
            # Save updated CSV back to disk
            csv_data.to_csv(csv_filename, index=False)
        
        # Update GeoJSON
        if os.path.exists(geojson_filename):
            geojson_data = gpd.read_file(geojson_filename)
            geojson_data[subindex] = geojson_data['GEOID'].map(year_data.set_index('GEOID')[subindex])
            geojson_data.to_file(geojson_filename, driver="GeoJSON")

def main():
    files_to_process = [
        ("CDI_bg_all.csv", "Commercial Density"),
        ("IDI_bg_all.csv", "Intersection Density"),
        ("LDI_bg_all.csv", "Entropy"),
        ("PDI_bg_all.csv", "Population Density")
    ]
    
    for file_name, column_name in files_to_process:
        if os.path.exists(file_name):
            process_file(file_name, column_name)
        else:
            print(f"File not found: {file_name}")

if __name__ == "__main__":
    main()
