# New York -- City Sustainability

In [24]:
import pandas as pd
import seaborn as sns
import matplotlib as plt
import os

from census import Census
from us import states

import plotly.graph_objects as go

In [25]:
c = Census('fb97753783c42ae57fe1a640e38fe04e921e5d1a')

**Greenhouse Gas Data:**

In [26]:
ghg = pd.DataFrame()

for f in os.listdir('../data/2018_data_summary_spreadsheets'):
    temp = pd.read_excel('../data/2018_data_summary_spreadsheets/'+f, sheet_name=0)
    temp['Year'] = f.split('.')[0].split('_')[2]    
    ghg = pd.concat([temp, ghg], sort=False)
    
fips_map = pd.read_excel('../data/fips-codes.xls', sheet_name=0)

fips_map = fips_map[fips_map['Entity Description'] == 'city']

def str_func(x):
    return str(x).zfill(5)

fips_map['FIPS'] = fips_map['FIPS Entity Code'].apply(str_func)
fips_map['City'] = fips_map['GU Name']
fips_map['State'] = fips_map['State Abbreviation']

ghg_mapped = pd.merge(ghg, fips_map, on=['State', 'City'])
total_emissions = ghg_mapped.groupby(['FIPS','Year'])['Total reported direct emissions'].agg('sum').to_frame()

total_emissions.reset_index(inplace=True)

pivot_em = total_emissions.pivot(index='FIPS', columns='Year', values='Total reported direct emissions')

## Get's the 5 largest cities in New York

In [27]:
keys = ['NAME' ,'P002001','P002002','P002005','H001001','P013001','H003001','P027001','H005001','H005002','H005003','H005004','H005005','H005006','H005007']

renames_2000 = {
        'NAME' : 'City_Name',
        'place': 'FIPS',
        'P002001': 'Total_Population_2000',
        'P002002':'Total_Urban_Population_2000',
        'P002005':'Total_Rural_Population_2000',
        'H001001': 'Total_Housing_2000',
        'P013001': 'Median_Age_2000',
        'H003001': 'Occupancy_Status_For_Housing_Units_2000',
        'P027001': 'Presence_of_Non-Relatives_2000',
        'H005001': 'Vacancy_Status_2000',
        'H005002': 'For_Rent_2000',
        'H005003': 'Rented_Not_Occupied_2000',
        'H005004': 'For_Sale_Only_2000',
        'H005005': 'Sold_Not_Occupied_2000',
        'H005006': 'For_Seasonal_Recreational_Or_Occasional_Use_2000',
        'H005007': 'For_Migrant_Workers_2000'
}

renames_2010 = {
        'NAME' : 'City_Name',
        'place': 'FIPS',
        'P002001': 'Total_Population_2010',
        'P002002':'Total_Urban_Population_2010',
        'P002005':'Total_Rural_Population_2010',
        'H001001': 'Total_Housing_2010',
        'P013001': 'Median_Age_2010',
        'H003001': 'Occupancy_Status_For_Housing_Units_2010',
        'P027001': 'Presence_of_Non-Relatives_2010',
        'H005001': 'Vacancy_Status_2010',
        'H005002': 'For_Rent_2010',
        'H005003': 'Rented_Not_Occupied_2010',
        'H005004': 'For_Sale_Only_2010',
        'H005005': 'Sold_Not_Occupied_2010',
        'H005006': 'For_Seasonal_Recreational_Or_Occasional_Use_2010',
        'H005007': 'For_Migrant_Workers_2010'}

city_2010 = c.sf1.state_place(keys, states.NY.fips, '*', year=2010)
c_pop_2010 = pd.DataFrame.from_records(city_2010)
c_pop_2010_50000 = c_pop_2010.rename(columns=renames_2010)

In [28]:
c_pop_2010_50000.head()

Unnamed: 0,Total_Housing_2010,Occupancy_Status_For_Housing_Units_2010,Vacancy_Status_2010,For_Rent_2010,Rented_Not_Occupied_2010,For_Sale_Only_2010,Sold_Not_Occupied_2010,For_Seasonal_Recreational_Or_Occasional_Use_2010,For_Migrant_Workers_2010,City_Name,Total_Population_2010,Total_Urban_Population_2010,Total_Rural_Population_2010,Median_Age_2010,Presence_of_Non-Relatives_2010,FIPS,state
0,711.0,711.0,12.0,4.0,0.0,2.0,0.0,6.0,0.0,"Stewart Manor village, New York",1896.0,1896.0,0.0,44.8,699.0,71267,36
1,740.0,740.0,61.0,26.0,1.0,4.0,5.0,7.0,0.0,"Stillwater village, New York",1738.0,1736.0,2.0,38.4,679.0,71322,36
2,545.0,545.0,65.0,7.0,1.0,12.0,0.0,33.0,0.0,"Stone Ridge CDP, New York",1173.0,0.0,1173.0,46.5,480.0,71597,36
3,5042.0,5042.0,196.0,23.0,1.0,42.0,12.0,62.0,0.0,"Stony Brook CDP, New York",13740.0,13738.0,2.0,43.0,4846.0,71608,36
4,166.0,166.0,26.0,1.0,1.0,7.0,1.0,3.0,0.0,"Burdett village, New York",340.0,0.0,340.0,41.0,140.0,11132,36


In [29]:
city_2000 = c.sf1.state_place(keys, states.NY.fips, '*', year=2000)
c_pop_2000 = pd.DataFrame.from_records(city_2000)
c_pop_2000_50000 = c_pop_2000.rename(columns=renames_2000)

In [30]:
c_pop_2000_50000.drop(columns=['City_Name', 'state'], inplace=True)

In [31]:
c_pop_2000_50000.head()

Unnamed: 0,Total_Housing_2000,Occupancy_Status_For_Housing_Units_2000,Vacancy_Status_2000,For_Rent_2000,Rented_Not_Occupied_2000,For_Sale_Only_2000,Sold_Not_Occupied_2000,For_Seasonal_Recreational_Or_Occasional_Use_2000,For_Migrant_Workers_2000,Total_Population_2000,Total_Urban_Population_2000,Total_Rural_Population_2000,Median_Age_2000,Presence_of_Non-Relatives_2000,FIPS
0,1158,1158,86,41,11,8,8,0,18,2597,0,0,35.9,2597,3001
1,823,823,107,31,14,10,20,0,32,1699,0,0,37.1,1699,20346
2,1018,1018,103,44,18,2,3,0,36,2166,0,0,41.2,2166,20731
3,329,329,31,6,9,3,4,0,9,791,0,0,38.3,791,21523
4,5564,5564,550,287,90,47,29,2,95,13617,0,0,31.8,13617,28640


In [32]:
c_pop_2000_50000.set_index('FIPS', inplace=True)
c_pop_2010_50000.set_index('FIPS', inplace=True)

In [33]:
ny_join = c_pop_2000_50000.join(c_pop_2010_50000, on='FIPS')
ny_join = ny_join.join(pivot_em, on='FIPS')

In [34]:
ny_join.head()

Unnamed: 0_level_0,Total_Housing_2000,Occupancy_Status_For_Housing_Units_2000,Vacancy_Status_2000,For_Rent_2000,Rented_Not_Occupied_2000,For_Sale_Only_2000,Sold_Not_Occupied_2000,For_Seasonal_Recreational_Or_Occasional_Use_2000,For_Migrant_Workers_2000,Total_Population_2000,...,state,2010,2011,2012,2013,2014,2015,2016,2017,2018
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3001,1158,1158,86,41,11,8,8,0,18,2597,...,36,,,,,,,,,
20346,823,823,107,31,14,10,20,0,32,1699,...,36,,,,,,,,,
20731,1018,1018,103,44,18,2,3,0,36,2166,...,36,,,,,,,,,
21523,329,329,31,6,9,3,4,0,9,791,...,36,,,,,,,,,
28640,5564,5564,550,287,90,47,29,2,95,13617,...,36,,,,,,,,,


In [35]:
ny_join['Total_Population_2000'] = ny_join['Total_Population_2000'].astype('i8')

In [36]:
ny_join = ny_join.nlargest(5, 'Total_Population_2000')

In [37]:
fig = go.Figure(data=[
    go.Bar(name='2000_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2000']),
    go.Bar(name='2010_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2010']),
    go.Bar(name='2000_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2000']),
    go.Bar(name='2010_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2010']),
    go.Bar(name='2000_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2000']),
    go.Bar(name='2010_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2010']),
])
fig.update_layout(barmode='group')
fig.show()

In [38]:
fig = go.Figure(data=[
    go.Bar(name='2000_age', x=ny_join['City_Name'], y=ny_join['Median_Age_2000']),
    go.Bar(name='2010_age', x=ny_join['City_Name'], y=ny_join['Median_Age_2010']),
])
fig.update_layout(barmode='group')
fig.show()

## American Community Servey

In [39]:
i = 0
acs_years = []
for x in range(2012, 2018):
    acs_test = c.acs5.state_place(('NAME',
                                   'B01003_001E',
                                   'B00002_001E',
                                   'B09018_007E',
                                   'B01002_001E'), states.NY.fips, '*', year=x)
    acs_years.append(pd.DataFrame.from_records(acs_test))
    acs_years[i] = acs_years[i].rename(columns={
        'NAME' : 'City_Name',
        'place': 'FIPS',
        'B01003_001E': 'Total_Population_{}'.format(x),
        'B00002_001E': 'Total_Housing_{}'.format(x),
        'B09018_007E': 'Presence_of_Non-Relatives_{}'.format(x),
        'B01002_001E': 'Median_Age_{}'.format(x),
    })
    acs_years[i].set_index('FIPS', inplace=True)
    acs_years[i].drop(columns=['City_Name', 'state'], inplace=True)
    acs_years[i] = acs_years[i].nlargest(5, 'Total_Population_{}'.format(x))
    i = i + 1

In [40]:
for x in acs_years:
    ny_join = ny_join.join(x)

In [41]:
ny_join.head()

Unnamed: 0_level_0,Total_Housing_2000,Occupancy_Status_For_Housing_Units_2000,Vacancy_Status_2000,For_Rent_2000,Rented_Not_Occupied_2000,For_Sale_Only_2000,Sold_Not_Occupied_2000,For_Seasonal_Recreational_Or_Occasional_Use_2000,For_Migrant_Workers_2000,Total_Population_2000,...,Total_Population_2015,Presence_of_Non-Relatives_2015,Total_Housing_2016,Median_Age_2016,Total_Population_2016,Presence_of_Non-Relatives_2016,Total_Housing_2017,Median_Age_2017,Total_Population_2017,Presence_of_Non-Relatives_2017
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
51000,3200912,3200912,179324,70542,15604,16033,28157,196,48792,8008278,...,8426743.0,66649.0,237307.0,35.9,8461961.0,67370.0,231338.0,36.2,8560072.0,67838.0
11000,145574,145574,22854,8642,2349,1629,254,0,9980,292648,...,259517.0,1433.0,9658.0,32.9,258989.0,1235.0,9431.0,32.8,259574.0,1128.0
63000,99789,99789,10790,5261,1406,775,218,0,3130,219773,...,210745.0,1365.0,8354.0,31.3,210291.0,1302.0,8140.0,31.7,209463.0,1321.0
84000,77589,77589,3238,1553,435,296,275,1,678,196086,...,199435.0,1407.0,5664.0,38.5,199725.0,1148.0,5425.0,38.8,200999.0,1088.0
73000,68192,68192,8710,4765,1221,500,180,1,2043,147306,...,144564.0,1314.0,6296.0,30.6,144350.0,1036.0,6091.0,30.6,144405.0,1002.0


In [42]:
fig = go.Figure(data=[
    go.Bar(name='2000_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2000']),
    go.Bar(name='2010_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2010']),
    go.Bar(name='2012_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2012']),
    go.Bar(name='2013_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2013']),
    go.Bar(name='2014_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2014']),
    go.Bar(name='2015_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2015']),
    go.Bar(name='2016_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2016']),
    go.Bar(name='2017_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2017']),
    go.Bar(name='2000_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2000']),
    go.Bar(name='2010_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2010']),
    go.Bar(name='2012_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2012']),
    go.Bar(name='2013_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2013']),
    go.Bar(name='2014_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2014']),
    go.Bar(name='2015_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2015']),
    go.Bar(name='2016_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2016']),
    go.Bar(name='2017_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2017']),
    go.Bar(name='2000_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2000']),
    go.Bar(name='2010_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2010']),
    go.Bar(name='2012_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2012']),
    go.Bar(name='2013_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2013']),
    go.Bar(name='2014_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2014']),
    go.Bar(name='2015_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2015']),
    go.Bar(name='2016_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2016']),
    go.Bar(name='2017_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2017']),
])
fig.update_layout(barmode='group')
fig.show()

**Plot with emissions data**

In [43]:
fig = go.Figure(data=[
    go.Bar(name='2000_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2000']),
    go.Bar(name='2010_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2010']),
    go.Bar(name='2012_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2012']),
    go.Bar(name='2013_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2013']),
    go.Bar(name='2014_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2014']),
    go.Bar(name='2015_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2015']),
    go.Bar(name='2016_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2016']),
    go.Bar(name='2017_pop', x=ny_join['City_Name'], y=ny_join['Total_Population_2017']),
    go.Bar(name='2000_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2000']),
    go.Bar(name='2010_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2010']),
    go.Bar(name='2012_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2012']),
    go.Bar(name='2013_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2013']),
    go.Bar(name='2014_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2014']),
    go.Bar(name='2015_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2015']),
    go.Bar(name='2016_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2016']),
    go.Bar(name='2017_housing', x=ny_join['City_Name'], y=ny_join['Total_Housing_2017']),
    go.Bar(name='2000_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2000']),
    go.Bar(name='2010_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2010']),
    go.Bar(name='2012_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2012']),
    go.Bar(name='2013_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2013']),
    go.Bar(name='2014_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2014']),
    go.Bar(name='2015_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2015']),
    go.Bar(name='2016_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2016']),
    go.Bar(name='2017_non-relatives', x=ny_join['City_Name'], y=ny_join['Presence_of_Non-Relatives_2017']),
    go.Bar(name='2010_direct_emissions', x=ny_join['City_Name'], y=ny_join['2010']),
    go.Bar(name='2011_direct_emissions', x=ny_join['City_Name'], y=ny_join['2011']),
    go.Bar(name='2012_direct_emissions', x=ny_join['City_Name'], y=ny_join['2012']),
    go.Bar(name='2013_direct_emissions', x=ny_join['City_Name'], y=ny_join['2013']),
    go.Bar(name='2014_direct_emissions', x=ny_join['City_Name'], y=ny_join['2014']),
    go.Bar(name='2015_direct_emissions', x=ny_join['City_Name'], y=ny_join['2015']),
    go.Bar(name='2016_direct_emissions', x=ny_join['City_Name'], y=ny_join['2016']),
    go.Bar(name='2017_direct_emissions', x=ny_join['City_Name'], y=ny_join['2017']),
    go.Bar(name='2018_direct_emissions', x=ny_join['City_Name'], y=ny_join['2018'])
])
fig.update_layout(barmode='group')
fig.show()