In [3]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_pdi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_pdi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'POPDENSITY': feature['properties'].get('POPDENSITY', 0),  # Default to 0 if POPDENSITY is missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in POPDENSITY
if df['POPDENSITY'].isnull().any():
    print("Warning: Missing POPDENSITY values found.")
    df['POPDENSITY'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of POPDENSITY across both years and scale between 0 and 1
df['PDI'] = rankdata(df['POPDENSITY'], method='average') / len(df['POPDENSITY'])

# Update the original features with the new PDI values
for i, feature in enumerate(combined_features):
    feature['properties']['PDI'] = df['PDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_pdi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_pdi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_pdi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_pdi_2022.csv')




In [4]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_cdi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_cdi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'commercial_density': feature['properties'].get('commercial_density', 0),  # Default to 0 if missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in commercial_density
if df['commercial_density'].isnull().any():
    print("Warning: Missing commercial_density values found.")
    df['commercial_density'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of commercial_density across both years and scale between 0 and 1
df['CDI'] = rankdata(df['commercial_density'], method='average') / len(df['commercial_density'])

# Update the original features with the new CDI values
for i, feature in enumerate(combined_features):
    feature['properties']['CDI'] = df['CDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_cdi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_cdi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_cdi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_cdi_2022.csv')


In [5]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_idi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_idi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'IDI': feature['properties'].get('IDI', 0),  # Default to 0 if IDI is missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in IDI
if df['IDI'].isnull().any():
    print("Warning: Missing IDI values found.")
    df['IDI'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of IDI across both years and scale between 0 and 1
df['IDI'] = rankdata(df['IDI'], method='average') / len(df['IDI'])

# Update the original features with the new IDI percentile rank values
for i, feature in enumerate(combined_features):
    feature['properties']['IDI'] = df['IDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_idi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_idi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_idi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_idi_2022.csv')


In [9]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_ldi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_ldi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'Entropy': feature['properties'].get('Entropy', 0),  # Default to 0 if entropy is missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in entropy
if df['Entropy'].isnull().any():
    print("Warning: Missing entropy values found.")
    df['Entropy'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of entropy across both years and scale between 0 and 1
df['LDI'] = rankdata(df['Entropy'], method='average') / len(df['Entropy'])

# Update the original features with the new LDI percentile rank values
for i, feature in enumerate(combined_features):
    feature['properties']['LDI'] = df['LDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_ldi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_ldi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_ldi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_ldi_2022.csv')


In [25]:
import json
import pandas as pd

# Function to load and process each GeoJSON file
def load_geojson(filepath, metric_name):
    with open(filepath, 'r') as file:
        geojson_data = json.load(file)
    
    data = []
    for feature in geojson_data['features']:
        # Convert coordinates to string for merging
        coordinates_str = json.dumps(feature['geometry']['coordinates'])
        data.append({
            'GEOID': feature['properties'].get('GEOID', None),
            'geometry': feature['geometry'],
            'coordinates_str': coordinates_str,
            metric_name: feature['properties'].get(metric_name, 0)  # Default to 0 if missing
        })
    return pd.DataFrame(data)

# Function to load all metrics for a given year
def load_year_data(year):
    pdi = load_geojson(f'atlanta_blockgroup_pdi_{year}.geojson', 'PDI')
    idi = load_geojson(f'atlanta_blockgroup_idi_{year}.geojson', 'IDI')
    ldi = load_geojson(f'atlanta_blockgroup_ldi_{year}.geojson', 'LDI')
    cdi = load_geojson(f'atlanta_blockgroup_cdi_{year}.geojson', 'CDI')
    return pdi, idi, ldi, cdi

# Load data for 2013 and 2022
pdi_2013, idi_2013, ldi_2013, cdi_2013 = load_year_data('2013')
pdi_2022, idi_2022, ldi_2022, cdi_2022 = load_year_data('2022')

# Function to merge data for a given year
def merge_data(pdi, idi, ldi, cdi):
    # Merge PDI and IDI
    merged = pd.merge(pdi, idi[['coordinates_str', 'IDI']], on='coordinates_str', how='inner')
    # Merge LDI
    merged = pd.merge(merged, ldi[['coordinates_str', 'LDI']], on='coordinates_str', how='inner')
    # Merge CDI
    merged = pd.merge(merged, cdi[['coordinates_str', 'CDI']], on='coordinates_str', how='inner')
    return merged

# Merge data for each year
data_2013 = merge_data(pdi_2013, idi_2013, ldi_2013, cdi_2013)
data_2022 = merge_data(pdi_2022, idi_2022, ldi_2022, cdi_2022)

# Calculate PEI for each year using the provided formula
data_2013['PEI'] = ((1 + data_2013['PDI']) * (1 + data_2013['IDI']) * (1 + data_2013['LDI']) * (1 + data_2013['CDI'])) / 16
data_2022['PEI'] = ((1 + data_2022['PDI']) * (1 + data_2022['IDI']) * (1 + data_2022['LDI']) * (1 + data_2022['CDI'])) / 16

# Add the year column
data_2013['year'] = 2013
data_2022['year'] = 2022

# Select only the required columns for the final output
data_2013_final = data_2013[['GEOID', 'geometry', 'year', 'PEI']]
data_2022_final = data_2022[['GEOID', 'geometry', 'year', 'PEI']]

# Function to save data to GeoJSON
def save_geojson(data, output_file):
    features = []
    for _, row in data.iterrows():
        feature = {
            "type": "Feature",
            "properties": {
                "GEOID": row['GEOID'],
                "year": row['year'],
                "PEI": row['PEI']
            },
            "geometry": row['geometry']
        }
        features.append(feature)
    geojson_data = {
        "type": "FeatureCollection",
        "features": features
    }
    with open(output_file, 'w') as file:
        json.dump(geojson_data, file)

# Function to save data to CSV
def save_csv(data, output_file):
    # Flatten the geometry for CSV export
    rows = []
    for _, row in data.iterrows():
        row_data = {
            "GEOID": row['GEOID'],
            "year": row['year'],
            "PEI": row['PEI'],
            "coordinates": json.dumps(row['geometry']['coordinates'])
        }
        rows.append(row_data)
    df = pd.DataFrame(rows)
    df.to_csv(output_file, index=False)

# Save the final data to GeoJSON and CSV files
save_geojson(data_2013_final, 'atlanta_blockgroup_pei_2013.geojson')
save_geojson(data_2022_final, 'atlanta_blockgroup_pei_2022.geojson')

save_csv(data_2013_final, 'atlanta_blockgroup_pei_2013.csv')
save_csv(data_2022_final, 'atlanta_blockgroup_pei_2022.csv')
