In [3]:
import pandas as pd
import matplotlib.pyplot as plt

# Read the budget data from the CSV file
df_budget = pd.read_csv('data/fy24-capital-budget-plan-recommended.csv')
df_budget['Total_Project_Budget'] = df_budget['Total_Project_Budget'].str.replace(',', '')
df_budget['Total_Project_Budget'] = df_budget['Total_Project_Budget'].astype(float)

# Remove "Multiple Neighborhoods" and "Citywide" from the data
df_budget = df_budget[df_budget['Neighborhood'] != 'Multiple Neighborhoods']
df_budget = df_budget[df_budget['Neighborhood'] != 'Citywide']

# Group by neighborhood and sum the total project budget
budget_by_areas = df_budget.groupby('Neighborhood')['Total_Project_Budget'].sum()

# Sort the DataFrame by total project budget in descending order
budget_by_areas = budget_by_areas.sort_values(ascending=False)

# Select the top 10 areas
top_10_areas = budget_by_areas.head(10)

# Read the median income data from the CSV file
df_income = pd.read_csv('./data/median_income.csv')

# Merge the budget and median income data on the "Neighborhood" column
merged_data = pd.merge(top_10_areas, df_income, left_index=True, right_on='Neighborhood')

# Plotting
plt.figure(figsize=(12, 6))

# Scatter plot
plt.scatter(merged_data['Median_Income'], merged_data['Total_Project_Budget'])
plt.title('Budget Allocation vs. Median Income by Neighborhood')
plt.xlabel('Median Income')
plt.ylabel('Total Project Budget')

# Add labels for each point
for i in range(len(merged_data)):
    plt.annotate(merged_data['Neighborhood_y'].iloc[i], 
                 (merged_data['Median_Income'].iloc[i], merged_data['Total_Project_Budget'].iloc[i]),
                 textcoords="offset points",
                 xytext=(5,5),
                 ha='right')

plt.show()

KeyError: 'Median Income'

<Figure size 1200x600 with 0 Axes>