In [6]:
import pandas as pd
import json

# Define the neighborhood_mapping class
class neighborhood_mapping:
    def __init__(self):
        self.load_mappings()
    
    def load_mappings(self):
        # load census tract to boston neighborhood mapping 
        self.tract_mapping = json.load(open("./data/tracts-neighbors.json")) # Ensure this file is in the relevant folder
        
        # load census block to boston neighborhood mapping 
        self.block_mapping = json.load(open("./data/blocks-neighbors.json")) # Not currently in use

    def tract_to_neighborhood(self, tract):
        # given a census tract return the boston neighborhood it is in 
        return self.tract_mapping.get(tract, None)
    
    def block_to_neighborhood(self, block):
        # given a census block return the boston neighborhood it is in 
        return self.block_mapping.get(block, None)

# Create the neighborhood mapping object
mappings = neighborhood_mapping()


# Function to format the census tract ID
def format_id(state, county, census_tract): 
    # The input is of the form, Ma Code, County Code, Census Tract
    full_id = f"{state}{county.zfill(3)}{census_tract.zfill(6)}"
    return full_id

In [17]:
demographics_by_tract = pd.read_csv("../data/processed/demographics_by_tract.csv")

# Convert relevant columns to numeric, forcing errors to NaN (which can be filled with 0 if needed)
numeric_columns = ["Total", "Hispanic or Latino", "Not Hispanic or Latino", "One Race Total", 
                   "White", "African American", "American Indian and Alaska Native", "Asian", 
                   "Native Hawaiian and Other Pacific Island", "Other"]

                   
demographics_by_tract[numeric_columns] = demographics_by_tract[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Now we need to add neighborhoods to the demographics_by_tract dataframe
demographics_by_tract['Neighborhood'] = demographics_by_tract['Census Tract'].apply(
    lambda tract: mappings.tract_to_neighborhood(format_id("25", "025", str(tract)))  # Assuming the county code is "025"
)

# Aggregate demographic data by Neighborhood
neighborhood_demographics = demographics_by_tract.groupby('Neighborhood').sum()

# Calculate percentage statistics for each neighborhood
neighborhood_demographics_percent = neighborhood_demographics.div(neighborhood_demographics['Total'], axis=0) * 100

# Combine total and percentage data
neighborhood_demographics_combined = pd.concat([neighborhood_demographics, neighborhood_demographics_percent.add_suffix(' Percent')], axis=1)

# Print the combined demographics data for verification
print(neighborhood_demographics_combined.head())

TypeError: unsupported operand type(s) for /: 'str' and 'int'