In [10]:
# import dependencies
import pandas as pd
import numpy as np
import gmaps

# import key
from config.config import google_api_key

# configure gmaps
gmaps.configure(api_key=google_api_key)

In [11]:
# load data
hpi2_df = pd.read_csv('../data/HPI2_MasterFile_2021-04-22.csv')
ca_census_tract_df = pd.read_csv('../resource/2020_gaz_tracts_06.txt', sep='\t', lineterminator='\n')

# merge geographical information into dataframe
hpi2_df = hpi2_df.merge(ca_census_tract_df, left_on='CensusTract', right_on='GEOID')
hpi2_df

Unnamed: 0,CensusTract,pop2010,pct2010gq,City,ZIP,County_FIPS,County_me,UrbanType,hpi2score,hpi2_pctile_pos,...,other_pct,version,USPS,GEOID,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG\r
0,6001400100,2937,0.1,Oakland,94704.0,6001,Alameda,urban_area,1.182028,99.114590,...,0.851209,4/22/2021,CA,6001400100,6945857,0,2.682,0.000,37.867656,-122.231881
1,6001400200,1974,3.5,Oakland,94618.0,6001,Alameda,urban_area,1.325874,99.704863,...,0.151976,4/22/2021,CA,6001400200,586560,0,0.226,0.000,37.848138,-122.249592
2,6001400300,4865,0.8,Oakland,94618.0,6001,Alameda,urban_area,0.925666,95.932247,...,0.411100,4/22/2021,CA,6001400300,1110840,0,0.429,0.000,37.840584,-122.254478
3,6001400400,3703,0.9,Oakland,94609.0,6001,Alameda,urban_area,1.117603,98.537149,...,0.567108,4/22/2021,CA,6001400400,716138,0,0.277,0.000,37.848284,-122.257445
4,6001400500,3517,3.6,Oakland,94609.0,6001,Alameda,urban_area,0.562799,83.318363,...,0.653966,4/22/2021,CA,6001400500,591424,0,0.228,0.000,37.848545,-122.264735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6875,6115040500,4111,0.8,Olivehurst,95961.0,6115,Yuba,urban_area,-0.857437,5.671757,...,0.072975,4/22/2021,CA,6115040500,2438067,0,0.941,0.000,39.100516,-121.553801
6876,6115040600,6130,0.1,Olivehurst,95961.0,6115,Yuba,urban_area,-0.673751,12.293084,...,0.179445,4/22/2021,CA,6115040600,4164409,0,1.608,0.000,39.084246,-121.546824
6877,6115040800,4233,0.1,,95692.0,6115,Yuba,urban_cluster,-0.037308,47.427178,...,0.188991,4/22/2021,CA,6115040800,109394813,128316,42.238,0.050,39.041053,-121.433980
6878,6115040901,2783,0.4,,95901.0,6115,Yuba,rural,-0.425301,24.188374,...,0.000000,4/22/2021,CA,6115040901,263666616,4473094,101.802,1.727,39.185210,-121.358537


In [12]:
# remove null value rows
hpi2_df = hpi2_df.loc[hpi2_df['hpi2score'].isnull().values == False]

# rename pa to Napa county
hpi2_df.loc[hpi2_df['County_me'] == 'pa', 'County_me'] = 'Napa'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [14]:
# set latitude and longitude for heatmap
coordinates = hpi2_df[['INTPTLAT', 'INTPTLONG\r']]
# set the weights to the percentile of people insured
# subtracted from 100 to flip the coloring scheme where worst peforming areas in red
insured_pctile = 100-hpi2_df['insured_pctile']


In [15]:
# map heatmap
fig = gmaps.figure()
heat_layer = gmaps.heatmap_layer(coordinates, weights = insured_pctile, dissipating=True, max_intensity=100)
fig.add_layer(heat_layer)
fig


Figure(layout=FigureLayout(height='420px'))

In [16]:
insured_df = hpi2_df.groupby('County_me').agg({
                                               'insured': [
                                                   np.mean,
                                                   np.median,
                                                   'min',
                                                   'max'
                                               ]
                                            })
insured_df.rename(columns={
    'insured': 'Insured'
}, inplace = True)
insured_df.rename_axis('County Name', inplace=True)

insured_df.sort_values(('Insured', 'median'))



Unnamed: 0_level_0,Insured,Insured,Insured,Insured
Unnamed: 0_level_1,mean,median,min,max
County Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Imperial,74.073684,71.7,58.4,91.6
Tulare,73.038889,71.85,52.7,92.0
Glenn,71.575,72.75,63.2,77.6
Colusa,74.075,74.0,71.5,76.8
Merced,74.776316,74.35,60.8,96.5
Los Angeles,73.908075,74.5,24.2,99.7
Riverside,75.419895,75.55,49.9,96.7
Lake,74.777778,75.6,68.2,80.4
Monterey,73.755696,75.8,41.6,98.4
Fresno,75.465031,76.1,48.4,95.5


In [17]:
county_coordinates_df = pd.read_csv('../resource/CenPop2010_Mean_CO06.txt', )
county_coordinates_df['FIPS'] = 6000 + county_coordinates_df['COUNTYFP']
county_coordinates_df.head()

Unnamed: 0,STATEFP,COUNTYFP,COUNAME,STNAME,POPULATION,LATITUDE,LONGITUDE,FIPS
0,6,1,Alameda,California,1510271,37.705201,-122.100393,6001
1,6,3,Alpine,California,1175,38.728298,-119.847603,6003
2,6,5,Amador,California,38091,38.394208,-120.775729,6005
3,6,7,Butte,California,220000,39.669037,-121.707635,6007
4,6,9,Calaveras,California,45578,38.163627,-120.633718,6009


In [18]:
# Combine data insured dataset together with lat and lng of county center
insured_df = insured_df.merge(county_coordinates_df, left_index=True, right_on='COUNAME')



In [20]:
# reverse percentage to properly color map
(100-insured_df[('Insured', 'median')])

0     12.10
2     12.50
3     17.10
4     18.50
5     26.00
6     11.35
7     18.60
8      9.10
9     23.90
10    27.25
11    22.15
12    28.30
13    16.15
14    21.80
15    23.45
16    24.40
17    15.20
18    25.50
19    22.50
20     8.45
21    15.40
22    21.80
23    25.65
24    22.90
25    14.15
26    24.20
27    13.50
28    16.60
29    16.00
30    11.75
31    20.20
32    24.45
33    16.40
34    17.40
36    17.60
37     9.75
38    21.30
39    16.90
40     8.40
41    16.35
42    11.10
43    13.10
44    19.50
45    17.10
46    18.00
47    15.10
48    15.90
49    22.15
50    22.10
51    17.85
52    19.90
53    28.15
54    17.10
55    16.00
56    10.20
57    22.80
Name: (Insured, median), dtype: float64

In [23]:
coordinates = insured_df[['LATITUDE', 'LONGITUDE']]
insured_median = insured_df[('Insured', 'median')]

In [24]:
fig = gmaps.figure()
heat_layer = gmaps.heatmap_layer(coordinates, weights=insured_median, dissipating=False, point_radius='10', max_intensity=100)
fig.add_layer(heat_layer)
fig

TraitError: The 'point_radius' trait of a WeightedHeatmap instance expected a float, not the str '10'.