# California -- City Sustainability

In [47]:
import pandas as pd
import seaborn as sns
import matplotlib as plt
import os

from census import Census
from us import states

import plotly.graph_objects as go

In [48]:
c = Census('fb97753783c42ae57fe1a640e38fe04e921e5d1a')

**Greenhouse Gas Data:**

In [49]:
ghg = pd.DataFrame()

for f in os.listdir('../data/2018_data_summary_spreadsheets'):
    temp = pd.read_excel('../data/2018_data_summary_spreadsheets/'+f, sheet_name=0)
    temp['Year'] = f.split('.')[0].split('_')[2]    
    ghg = pd.concat([temp, ghg], sort=False)
    
fips_map = pd.read_excel('../data/fips-codes.xls', sheet_name=0)

fips_map = fips_map[fips_map['Entity Description'] == 'city']

def str_func(x):
    return str(x).zfill(5)

fips_map['FIPS'] = fips_map['FIPS Entity Code'].apply(str_func)
fips_map['City'] = fips_map['GU Name']
fips_map['State'] = fips_map['State Abbreviation']

ghg_mapped = pd.merge(ghg, fips_map, on=['State', 'City'])
total_emissions = ghg_mapped.groupby(['FIPS','Year'])['Total reported direct emissions'].agg('sum').to_frame()

total_emissions.reset_index(inplace=True)

pivot_em = total_emissions.pivot(index='FIPS', columns='Year', values='Total reported direct emissions')

## Get's the 5 largest cities in California

In [50]:
city_2010 = c.sf1.state_place(('NAME', 'H001001', 
                               'P013001', 'P002002', 'P002005', 
                               'P013001', 'H003001', 'P027001', 
                               'H005001', 'H005002', 'H005003', 
                               'H005004', 'H005005', 'H005006', 
                               'H005007', 'P002001'), 
                              states.CA.fips, '*', year=2010)
c_pop_2010 = pd.DataFrame.from_records(city_2010)
c_pop_2010_50000 = c_pop_2010.rename(columns={
        'NAME' : 'City_Name',
        'place': 'FIPS',
        'P002001': 'Total_Population_2010',
        'P002002':'Total_Urban_Population_2010',
        'P002005':'Total_Rural_Population_2010',
        'H001001': 'Total_Housing_2010',
        'P013001': 'Median_Age_2010',
        'H003001': 'Occupancy_Status_For_Housing_Units_2010',
        'P027001': 'Presence_of_Non-Relatives_2010',
        'H005001': 'Vacancy_Status_2010',
        'H005002': 'For_Rent_2010',
        'H005003': 'Rented_Not_Occupied_2010',
        'H005004': 'For_Sale_Only_2010',
        'H005005': 'Sold_Not_Occupied_2010',
        'H005006': 'For_Seasonal_Recreational_Or_Occasional_Use_2010',
        'H005007': 'For_Migrant_Workers_2010'})

In [51]:
c_pop_2010_50000.head()

Unnamed: 0,Total_Housing_2010,Occupancy_Status_For_Housing_Units_2010,Vacancy_Status_2010,For_Rent_2010,Rented_Not_Occupied_2010,For_Sale_Only_2010,Sold_Not_Occupied_2010,For_Seasonal_Recreational_Or_Occasional_Use_2010,For_Migrant_Workers_2010,City_Name,Total_Population_2010,Total_Urban_Population_2010,Total_Rural_Population_2010,Median_Age_2010,Presence_of_Non-Relatives_2010,FIPS,state
0,4062.0,4062.0,1300.0,88.0,3.0,71.0,10.0,1058.0,0.0,"Cambria CDP, California",6032.0,5865.0,167.0,57.1,2762.0,10074,6
1,480.0,480.0,89.0,9.0,0.0,14.0,2.0,58.0,0.0,"Camanche North Shore CDP, California",979.0,0.0,979.0,44.1,391.0,10042,6
2,344.0,344.0,35.0,4.0,0.0,10.0,1.0,5.0,0.0,"Camanche Village CDP, California",847.0,0.0,847.0,38.7,309.0,10044,6
3,25702.0,25702.0,1198.0,406.0,29.0,249.0,41.0,117.0,1.0,"Camarillo city, California",65201.0,64818.0,383.0,40.8,24504.0,10046,6
4,1179.0,1179.0,38.0,10.0,1.0,7.0,3.0,2.0,0.0,"Cambrian Park CDP, California",3282.0,3282.0,0.0,42.4,1141.0,10088,6


In [52]:
city_2000 = c.sf1.state_place(('NAME', 'H001001', 
                               'P013001', 'P002002', 'P002005', 
                               'P013001', 'H003001', 'P027001', 
                               'H005001', 'H005002', 'H005003', 
                               'H005004', 'H005005', 'H005006', 
                               'H005007', 'P002001'), states.CA.fips, '*', year=2000)
c_pop_2000 = pd.DataFrame.from_records(city_2000)
c_pop_2000_50000 = c_pop_2000.rename(columns={
        'NAME' : 'City_Name',
        'place': 'FIPS',
        'P002001': 'Total_Population_2000',
        'P002002':'Total_Urban_Population_2000',
        'P002005':'Total_Rural_Population_2000',
        'H001001': 'Total_Housing_2000',
        'P013001': 'Median_Age_2000',
        'H003001': 'Occupancy_Status_For_Housing_Units_2000',
        'P027001': 'Presence_of_Non-Relatives_2000',
        'H005001': 'Vacancy_Status_2000',
        'H005002': 'For_Rent_2000',
        'H005003': 'Rented_Not_Occupied_2000',
        'H005004': 'For_Sale_Only_2000',
        'H005005': 'Sold_Not_Occupied_2000',
        'H005006': 'For_Seasonal_Recreational_Or_Occasional_Use_2000',
        'H005007': 'For_Migrant_Workers_2000'})

In [53]:
c_pop_2000_50000.drop(columns=['City_Name', 'state'], inplace=True)

In [54]:
c_pop_2000_50000.head()

Unnamed: 0,Total_Housing_2000,Occupancy_Status_For_Housing_Units_2000,Vacancy_Status_2000,For_Rent_2000,Rented_Not_Occupied_2000,For_Sale_Only_2000,Sold_Not_Occupied_2000,For_Seasonal_Recreational_Or_Occasional_Use_2000,For_Migrant_Workers_2000,Total_Population_2000,Total_Urban_Population_2000,Total_Rural_Population_2000,Median_Age_2000,Presence_of_Non-Relatives_2000,FIPS
0,915,915,26,12,4,1,1,0,8,2797,0,0,37.3,2797,2812
1,3145,3145,135,61,29,3,11,0,31,12956,0,0,23.3,12956,2924
2,88262,88262,4821,2187,1018,309,268,3,1036,247057,0,0,30.1,247057,3526
3,2147,2147,561,15,69,18,417,0,42,4232,0,0,43.8,4232,4734
4,1188,1188,348,18,46,6,219,0,59,1823,0,0,49.8,1823,7274


In [55]:
c_pop_2000_50000.set_index('FIPS', inplace=True)
c_pop_2010_50000.set_index('FIPS', inplace=True)

In [56]:
ca_join = c_pop_2000_50000.join(c_pop_2010_50000, on='FIPS')
ca_join = ca_join.join(pivot_em, on='FIPS')

In [57]:
ca_join.head()

Unnamed: 0_level_0,Total_Housing_2000,Occupancy_Status_For_Housing_Units_2000,Vacancy_Status_2000,For_Rent_2000,Rented_Not_Occupied_2000,For_Sale_Only_2000,Sold_Not_Occupied_2000,For_Seasonal_Recreational_Or_Occasional_Use_2000,For_Migrant_Workers_2000,Total_Population_2000,...,state,2010,2011,2012,2013,2014,2015,2016,2017,2018
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2812,915,915,26,12,4,1,1,0,8,2797,...,6,,,,,,,,,
2924,3145,3145,135,61,29,3,11,0,31,12956,...,6,,,,,,,,,
3526,88262,88262,4821,2187,1018,309,268,3,1036,247057,...,6,980629.082,906498.922,1155619.396,1207249.466,1848460.71,2086014.066,94153.496,69323.64,18864.88
4734,2147,2147,561,15,69,18,417,0,42,4232,...,6,,,,,,,,,
7274,1188,1188,348,18,46,6,219,0,59,1823,...,6,,,,,,,,,


In [58]:
ca_join['Total_Population_2000'] = ca_join['Total_Population_2000'].astype('i8')

In [59]:
ca_join = ca_join.nlargest(5, 'Total_Population_2000')

In [60]:
fig = go.Figure(data=[
    go.Bar(name='2000_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2000']),
    go.Bar(name='2010_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2010']),
    go.Bar(name='2000_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2000']),
    go.Bar(name='2010_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2010']),
    go.Bar(name='2000_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2000']),
    go.Bar(name='2010_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2010']),
])
fig.update_layout(barmode='group')
fig.show()

In [61]:
fig = go.Figure(data=[
    go.Bar(name='2000_age', x=ca_join['City_Name'], y=ca_join['Median_Age_2000']),
    go.Bar(name='2010_age', x=ca_join['City_Name'], y=ca_join['Median_Age_2010']),
])
fig.update_layout(barmode='group')
fig.show()

## American Community Servey

In [62]:
i = 0
acs_years = []
for x in range(2012, 2018):
    acs_test = c.acs5.state_place(('NAME',
                                   'B01003_001E',
                                   'B00002_001E',
                                   'B09018_007E',
                                   'B01002_001E'), states.CA.fips, '*', year=x)
    acs_years.append(pd.DataFrame.from_records(acs_test))
    acs_years[i] = acs_years[i].rename(columns={
        'NAME' : 'City_Name',
        'place': 'FIPS',
        'B01003_001E': 'Total_Population_{}'.format(x),
        'B00002_001E': 'Total_Housing_{}'.format(x),
        'B09018_007E': 'Presence_of_Non-Relatives_{}'.format(x),
        'B01002_001E': 'Median_Age_{}'.format(x),
    })
    acs_years[i].set_index('FIPS', inplace=True)
    acs_years[i].drop(columns=['City_Name', 'state'], inplace=True)
    acs_years[i] = acs_years[i].nlargest(5, 'Total_Population_{}'.format(x))
    i = i + 1

In [63]:
for x in acs_years:
    ca_join = ca_join.join(x)

In [64]:
ca_join.head()

Unnamed: 0_level_0,Total_Housing_2000,Occupancy_Status_For_Housing_Units_2000,Vacancy_Status_2000,For_Rent_2000,Rented_Not_Occupied_2000,For_Sale_Only_2000,Sold_Not_Occupied_2000,For_Seasonal_Recreational_Or_Occasional_Use_2000,For_Migrant_Workers_2000,Total_Population_2000,...,Total_Population_2015,Presence_of_Non-Relatives_2015,Total_Housing_2016,Median_Age_2016,Total_Population_2016,Presence_of_Non-Relatives_2016,Total_Housing_2017,Median_Age_2017,Total_Population_2017,Presence_of_Non-Relatives_2017
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
44000,1337706,1337706,62294,28529,9036,5161,4876,29,14663,3694820,...,3900794.0,40218.0,117861.0,35.0,3918872.0,39675.0,115400.0,35.2,3949776.0,40048.0
66000,469689,469689,18998,7511,1806,1787,5093,7,2794,1223400,...,1359791.0,8180.0,36884.0,34.2,1374812.0,8124.0,36377.0,34.3,1390966.0,8207.0
68000,281841,281841,5243,1929,714,578,818,50,1154,894943,...,1000860.0,10638.0,26125.0,36.1,1009363.0,10400.0,25882.0,36.4,1023031.0,10586.0
67000,346527,346527,16827,5594,910,1419,3762,79,5063,776733,...,840763.0,5793.0,26388.0,38.4,850282.0,5346.0,26142.0,38.3,864263.0,5258.0
43000,171632,171632,8544,4195,1484,683,761,2,1419,461522,...,,,,,,,,,,


In [65]:
fig = go.Figure(data=[
    go.Bar(name='2000_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2000']),
    go.Bar(name='2010_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2010']),
    go.Bar(name='2012_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2012']),
    go.Bar(name='2013_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2013']),
    go.Bar(name='2014_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2014']),
    go.Bar(name='2015_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2015']),
    go.Bar(name='2016_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2016']),
    go.Bar(name='2017_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2017']),
    go.Bar(name='2000_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2000']),
    go.Bar(name='2010_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2010']),
    go.Bar(name='2012_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2012']),
    go.Bar(name='2013_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2013']),
    go.Bar(name='2014_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2014']),
    go.Bar(name='2015_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2015']),
    go.Bar(name='2016_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2016']),
    go.Bar(name='2017_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2017']),
    go.Bar(name='2000_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2000']),
    go.Bar(name='2010_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2010']),
    go.Bar(name='2012_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2012']),
    go.Bar(name='2013_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2013']),
    go.Bar(name='2014_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2014']),
    go.Bar(name='2015_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2015']),
    go.Bar(name='2016_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2016']),
    go.Bar(name='2017_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2017']),
])
fig.update_layout(barmode='group')
fig.show()

**Plot with emissions data**

In [66]:
fig = go.Figure(data=[
    go.Bar(name='2000_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2000']),
    go.Bar(name='2010_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2010']),
    go.Bar(name='2012_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2012']),
    go.Bar(name='2013_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2013']),
    go.Bar(name='2014_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2014']),
    go.Bar(name='2015_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2015']),
    go.Bar(name='2016_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2016']),
    go.Bar(name='2017_pop', x=ca_join['City_Name'], y=ca_join['Total_Population_2017']),
    go.Bar(name='2000_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2000']),
    go.Bar(name='2010_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2010']),
    go.Bar(name='2012_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2012']),
    go.Bar(name='2013_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2013']),
    go.Bar(name='2014_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2014']),
    go.Bar(name='2015_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2015']),
    go.Bar(name='2016_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2016']),
    go.Bar(name='2017_housing', x=ca_join['City_Name'], y=ca_join['Total_Housing_2017']),
    go.Bar(name='2000_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2000']),
    go.Bar(name='2010_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2010']),
    go.Bar(name='2012_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2012']),
    go.Bar(name='2013_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2013']),
    go.Bar(name='2014_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2014']),
    go.Bar(name='2015_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2015']),
    go.Bar(name='2016_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2016']),
    go.Bar(name='2017_non-relatives', x=ca_join['City_Name'], y=ca_join['Presence_of_Non-Relatives_2017']),
    go.Bar(name='2010_direct_emissions', x=ca_join['City_Name'], y=ca_join['2010']),
    go.Bar(name='2011_direct_emissions', x=ca_join['City_Name'], y=ca_join['2011']),
    go.Bar(name='2012_direct_emissions', x=ca_join['City_Name'], y=ca_join['2012']),
    go.Bar(name='2013_direct_emissions', x=ca_join['City_Name'], y=ca_join['2013']),
    go.Bar(name='2014_direct_emissions', x=ca_join['City_Name'], y=ca_join['2014']),
    go.Bar(name='2015_direct_emissions', x=ca_join['City_Name'], y=ca_join['2015']),
    go.Bar(name='2016_direct_emissions', x=ca_join['City_Name'], y=ca_join['2016']),
    go.Bar(name='2017_direct_emissions', x=ca_join['City_Name'], y=ca_join['2017']),
    go.Bar(name='2018_direct_emissions', x=ca_join['City_Name'], y=ca_join['2018'])
])
fig.update_layout(barmode='group')
fig.show()