In [10]:
import numpy as np
import pandas as pd
import geopandas as gpd
from lets_plot import *

LetsPlot.setup_html()

## Prepare the dataset

In [11]:
from sklearn.datasets import fetch_california_housing

california_housing_bunch = fetch_california_housing()
data = pd.DataFrame(california_housing_bunch.data, columns=california_housing_bunch.feature_names)

# Add $-value field to the dataframe.
# dataset.target: numpy array of shape (20640,)
# Each value corresponds to the average house value in units of 100,000.
data['Value($)'] = california_housing_bunch.target * 100000
data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Value($)
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,452600.0
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,358500.0
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,352100.0
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,341300.0
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,342200.0


In [12]:
# Draw a random sample from the data set.
# data = data.sample(n=1000)

### Use `geopandas` to read a shape file to GeoDataFrame

In [13]:
CA = gpd.read_file("./ca-state-boundary/CA_State_TIGER2016.shp")
CA.head()

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,STUSPS,NAME,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,4,9,6,1779778,6,CA,California,0,G4000,A,403501101370,20466718403,37.1551773,-119.5434183,"MULTIPOLYGON (((-13317677.375 3930590.808, -13..."


Keeping in mind that our target is the housing value, fill the choropleth over the state contours using `geom_map()`function

### Make a plot out of polygon and points

The color of the points will reflect the house age and
the size of the points will reflect the value of the house.

In [18]:
# The plot base 
p = ggplot(data=data) + scale_color_gradient(name='House Age', low='red', high='green')

# The points layer
points = geom_point(aes(x='Longitude',
                        y='Latitude',
                        size='Value($)',
                        color='HouseAge'), 
#                     data=data,
                    alpha=0.8)

# The map
p + geom_polygon(map=CA, fill='#F8F4F0', color='#B71234')\
  + points\
  + theme(axis_title='blank', axis_text='blank', axis_ticks='blank', axis_line='blank', axis_tooltip='blank')\
  + ggsize(600, 500)

### Make a plot similar to the one above but interactive

In [19]:
p + geom_livemap()\
  + geom_polygon(map=CA, fill='white', color='#B71234', alpha=0.5)\
  + points