In [3]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_pdi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_pdi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'POPDENSITY': feature['properties'].get('POPDENSITY', 0),  # Default to 0 if POPDENSITY is missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in POPDENSITY
if df['POPDENSITY'].isnull().any():
    print("Warning: Missing POPDENSITY values found.")
    df['POPDENSITY'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of POPDENSITY across both years and scale between 0 and 1
df['PDI'] = rankdata(df['POPDENSITY'], method='average') / len(df['POPDENSITY'])

# Update the original features with the new PDI values
for i, feature in enumerate(combined_features):
    feature['properties']['PDI'] = df['PDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_pdi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_pdi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_pdi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_pdi_2022.csv')




In [4]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_cdi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_cdi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'commercial_density': feature['properties'].get('commercial_density', 0),  # Default to 0 if missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in commercial_density
if df['commercial_density'].isnull().any():
    print("Warning: Missing commercial_density values found.")
    df['commercial_density'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of commercial_density across both years and scale between 0 and 1
df['CDI'] = rankdata(df['commercial_density'], method='average') / len(df['commercial_density'])

# Update the original features with the new CDI values
for i, feature in enumerate(combined_features):
    feature['properties']['CDI'] = df['CDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_cdi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_cdi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_cdi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_cdi_2022.csv')


In [5]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_idi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_idi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'IDI': feature['properties'].get('IDI', 0),  # Default to 0 if IDI is missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in IDI
if df['IDI'].isnull().any():
    print("Warning: Missing IDI values found.")
    df['IDI'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of IDI across both years and scale between 0 and 1
df['IDI'] = rankdata(df['IDI'], method='average') / len(df['IDI'])

# Update the original features with the new IDI percentile rank values
for i, feature in enumerate(combined_features):
    feature['properties']['IDI'] = df['IDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_idi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_idi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_idi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_idi_2022.csv')


In [9]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the two GeoJSON files
with open('atlanta_blockgroup_ldi_2013.geojson', 'r') as file:
    geojson_2013 = json.load(file)
with open('atlanta_blockgroup_ldi_2022.geojson', 'r') as file:
    geojson_2022 = json.load(file)

# Extract features from both GeoJSON files and add a year attribute
features_2013 = geojson_2013['features']
features_2022 = geojson_2022['features']

for feature in features_2013:
    feature['properties']['year'] = 2013
for feature in features_2022:
    feature['properties']['year'] = 2022

# Combine features
combined_features = features_2013 + features_2022

# Create a DataFrame from the combined features
data = [
    {
        'year': feature['properties']['year'],
        'Entropy': feature['properties'].get('Entropy', 0),  # Default to 0 if entropy is missing
        'properties': feature['properties'],  # Store properties for reconstruction
        'geometry': feature['geometry']  # Store geometry for reconstruction
    }
    for feature in combined_features
]
df = pd.DataFrame(data)

# Check for missing values in entropy
if df['Entropy'].isnull().any():
    print("Warning: Missing entropy values found.")
    df['Entropy'].fillna(0, inplace=True)  # Replace NaN values with 0 for ranking

# Calculate percentile ranks of entropy across both years and scale between 0 and 1
df['LDI'] = rankdata(df['Entropy'], method='average') / len(df['Entropy'])

# Update the original features with the new LDI percentile rank values
for i, feature in enumerate(combined_features):
    feature['properties']['LDI'] = df['LDI'].iloc[i]

# Split the combined GeoJSON back into the original years
features_2013_updated = [f for f in combined_features if f['properties']['year'] == 2013]
features_2022_updated = [f for f in combined_features if f['properties']['year'] == 2022]

# Overwrite the original GeoJSON files
geojson_2013['features'] = features_2013_updated
geojson_2022['features'] = features_2022_updated

with open('atlanta_blockgroup_ldi_2013.geojson', 'w') as file:
    json.dump(geojson_2013, file)

with open('atlanta_blockgroup_ldi_2022.geojson', 'w') as file:
    json.dump(geojson_2022, file)

# Convert each GeoJSON back to CSV
def geojson_to_csv(geojson_data, csv_path):
    # Flatten the properties and geometry into a CSV-friendly format
    rows = []
    for feature in geojson_data['features']:
        row = feature['properties'].copy()  # Start with properties
        row['geometry_type'] = feature['geometry']['type']
        row['coordinates'] = json.dumps(feature['geometry']['coordinates'])  # Serialize coordinates as string
        rows.append(row)
    # Convert to DataFrame and write to CSV
    df = pd.DataFrame(rows)
    df.to_csv(csv_path, index=False)

# Convert the updated GeoJSONs to CSV files with the original names
geojson_to_csv(geojson_2013, 'atlanta_blockgroup_ldi_2013.csv')
geojson_to_csv(geojson_2022, 'atlanta_blockgroup_ldi_2022.csv')


In [14]:
import json
import pandas as pd
from scipy.stats import rankdata

# Load the GeoJSON files for each metric
with open('atlanta_blockgroup_pdi_2013.geojson', 'r') as file:
    pdi_2013 = json.load(file)
with open('atlanta_blockgroup_pdi_2022.geojson', 'r') as file:
    pdi_2022 = json.load(file)

with open('atlanta_blockgroup_idi_2013.geojson', 'r') as file:
    idi_2013 = json.load(file)
with open('atlanta_blockgroup_idi_2022.geojson', 'r') as file:
    idi_2022 = json.load(file)

with open('atlanta_blockgroup_ldi_2013.geojson', 'r') as file:
    ldi_2013 = json.load(file)
with open('atlanta_blockgroup_ldi_2022.geojson', 'r') as file:
    ldi_2022 = json.load(file)

with open('atlanta_blockgroup_cdi_2013.geojson', 'r') as file:
    cdi_2013 = json.load(file)
with open('atlanta_blockgroup_cdi_2022.geojson', 'r') as file:
    cdi_2022 = json.load(file)

# Function to create a DataFrame from a GeoJSON file, with geometry as a key
def geojson_to_dataframe(geojson_data, metric_name):
    data = []
    for feature in geojson_data['features']:
        geometry_str = json.dumps(feature['geometry']['coordinates'])  # Standardized string of coordinates only
        data.append({
            'geometry_str': geometry_str,
            metric_name: feature['properties'].get(metric_name, 0),  # Default to 0 if missing
            'geometry': feature['geometry']  # Keep geometry as a dictionary
        })
    return pd.DataFrame(data)

# Convert each GeoJSON to DataFrames for easy merging
pdi_2013_df = geojson_to_dataframe(pdi_2013, 'PDI')
pdi_2022_df = geojson_to_dataframe(pdi_2022, 'PDI')

idi_2013_df = geojson_to_dataframe(idi_2013, 'IDI')
idi_2022_df = geojson_to_dataframe(idi_2022, 'IDI')

ldi_2013_df = geojson_to_dataframe(ldi_2013, 'LDI')
ldi_2022_df = geojson_to_dataframe(ldi_2022, 'LDI')

cdi_2013_df = geojson_to_dataframe(cdi_2013, 'CDI')
cdi_2022_df = geojson_to_dataframe(cdi_2022, 'CDI')

# Merge the DataFrames for each year on geometry_str to combine all metrics
data_2013 = pdi_2013_df.merge(idi_2013_df, on='geometry_str').merge(ldi_2013_df, on='geometry_str').merge(cdi_2013_df, on='geometry_str')
data_2022 = pdi_2022_df.merge(idi_2022_df, on='geometry_str').merge(ldi_2022_df, on='geometry_str').merge(cdi_2022_df, on='geometry_str')

# Explicitly select and retain only one 'geometry' column after merging
data_2013['geometry'] = data_2013['geometry_x']
data_2022['geometry'] = data_2022['geometry_x']

# Drop any extra geometry columns created during the merge
data_2013 = data_2013.drop(columns=[col for col in data_2013.columns if col.startswith('geometry_') and col != 'geometry'])
data_2022 = data_2022.drop(columns=[col for col in data_2022.columns if col.startswith('geometry_') and col != 'geometry'])


# Calculate PEI for each year
data_2013['PEI'] = ((1 + data_2013['PDI']) * (1 + data_2013['IDI']) * (1 + data_2013['LDI']) * (1 + data_2013['CDI'])) / 16
data_2022['PEI'] = ((1 + data_2022['PDI']) * (1 + data_2022['IDI']) * (1 + data_2022['LDI']) * (1 + data_2022['CDI'])) / 16

# Add the year to each DataFrame
data_2013['year'] = 2013
data_2022['year'] = 2022

# Select only the required columns
data_2013 = data_2013[['geometry', 'year', 'PEI']]
data_2022 = data_2022[['geometry', 'year', 'PEI']]

# Convert to GeoJSON format
def dataframe_to_geojson(data, output_file):
    features = []
    for _, row in data.iterrows():
        feature = {
            "type": "Feature",
            "properties": {
                "year": row['year'],
                "PEI": row['PEI']
            },
            "geometry": row['geometry']
        }
        features.append(feature)
    
    geojson = {
        "type": "FeatureCollection",
        "features": features
    }
    
    with open(output_file, 'w') as file:
        json.dump(geojson, file)

# Save as GeoJSON
dataframe_to_geojson(data_2013, 'atlanta_blockgroup_pei_2013.geojson')
dataframe_to_geojson(data_2022, 'atlanta_blockgroup_pei_2022.geojson')

# Convert to CSV
def dataframe_to_csv(data, output_file):
    # Flatten the geometry for CSV export
    rows = []
    for _, row in data.iterrows():
        row_data = {
            "year": row['year'],
            "PEI": row['PEI'],
            "geometry_type": row['geometry']['type'],
            "coordinates": json.dumps(row['geometry']['coordinates'])
        }
        rows.append(row_data)
    
    df = pd.DataFrame(rows)
    df.to_csv(output_file, index=False)

# Save as CSV
dataframe_to_csv(data_2013, 'atlanta_blockgroup_pei_2013.csv')
dataframe_to_csv(data_2022, 'atlanta_blockgroup_pei_2022.csv')



  data_2013 = pdi_2013_df.merge(idi_2013_df, on='geometry_str').merge(ldi_2013_df, on='geometry_str').merge(cdi_2013_df, on='geometry_str')
  data_2022 = pdi_2022_df.merge(idi_2022_df, on='geometry_str').merge(ldi_2022_df, on='geometry_str').merge(cdi_2022_df, on='geometry_str')


ValueError: Cannot set a DataFrame with multiple columns to the single column geometry

In [None]:
dont have 