In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import contextily as ctx
# from shapely.geometry import Point, Polygon
# from pyproj import CRS
# from sklearn.linear_model import LinearRegression
# import seaborn as sns
# import statsmodels.api as sm
import folium
from folium.plugins import MarkerCluster, FastMarkerCluster, HeatMap#, heat_map_withtime, HeatMapWithTime
from folium.plugins.heat_map_withtime import HeatMapWithTime
from sklearn.neighbors import KernelDensity
import branca.colormap as cm

In [2]:
df = pd.read_csv('data/updated_housing.csv')

# Base Map

### Setting Tiles
Tiles
   These are a list of tile that can be used. This list is not exhaustive.
   
   **Example**: folium.Map(tiles = dark_map), default is 'OpenStreetMap'
   

In [203]:
# map tile types
dark_map = 'cartodbdark_matter'
stamentoner = "stamentoner"
litestamentoner = 'stamentonerlite'
light_map = 'cartodbpositron'
terrain = 'StamenTerrain'
ESRI_DeLorme = 'ESRIDeLorme'
ESRI_WorldImagery ='ESRIWorldImagery'
ESRI_NatGeoWorldMap = 'ESRINatGeoWorldMap'

### Setting Starting location and scale
location is the starting location, in the example below I center it along the mean lat and long


In [4]:
folium_map = folium.Map(location=[df['lat'].mean(), df['long'].mean()])

# Pluggin Arguments
Every pluggin requires a data arguments. This must be a list of lat,long and then a weight. 

For heatmaps this can be an array or a list.

[[x, y, z][x1, y1, z1][xn, yn, zn]] or [(x, y, z),(x1, y1, z1),(xn, yn, zn)]
### Example:

In [5]:
data_values = zip(df['lat'], df['long'],df['price'])

# Cluster map

The cluster map allows lets you cluster data so that you can see areas locations with higher counts. 

This can be done using MarkerCluster or FastMarkerCluster.

MarkerCluster allows for more customization option with the style of the marker however with large datasets like this one it is much slower then using FastMarkerCluster.

Below is a basic example for using FastMarkerCluster to create a cluster map.

In [6]:
# Create a folium map, this is centered at the mean latitude and longitude 
folium_map = folium.Map(location=[df['lat'].mean(), df['long'].mean()])

# Create a list of tuples containing the latitude and longitude for each data point
data_values = zip(df['lat'], df['long'])

# Add the marker cluster layer to the map
folium.plugins.FastMarkerCluster(data_values).add_to(folium_map)

# Display the map
display(folium_map)

# Level Up
- Adds weight to icon through callback function
- Adds addtional arguments through kwargs

In [362]:
def folium_cluster(df, column, metrics=False, **kwargs):
    '''
    Function creates a folium cluster map,
    
    df : dataframe - must have columns 'lat' and 'long'
    column : name 'column' 
                Used for cluster weight
    metrics : defalut = 0 (makes all icons be red)
            Cluster threshold metric 
            (mean,median,mode, or specific int/float value)
                
    **kwargs are for folium.Map,
    This is a list of the most common arguments/default values:
    width='100%', height='100%', tiles='OpenStreetMap', min_zoom=0, max_zoom=18, zoom_start=10
    '''
    # Code to determine metrics
    if metrics == 'mean':
        metric = df[column].mean()
        print(f'Mean:{metric}')
    elif metrics == 'median':
            metric = df[column].median()
            print(f'Median:{metric}')
    elif metrics == 'mode':
            metric = int(df[column].mode())
            print(f'Mode:{metric}')
    elif type(metrics) == int: 
            metric = metrics
    else:
        metric = 'false'
            
    # Create a folium map, this is centered at the mean latitude and longitude 
    folium_map = folium.Map(location=[df['lat'].mean(), df['long'].mean()],**kwargs)


    # Create a list of tuples containing the latitude and longitude for each data point
    data_values = zip(df['lat'], df['long'],df[column])
   
    # callback functions need to be in javascript to be run, this is adapted from:
    # https://github.com/python-visualization/folium/blob/main/examples/MarkerCluster.ipynb
    callbacks = f"""\
    function (row) {{
        var icon, marker, value;
        value = row[2]; // assuming the column is the 3rd column in the dataframe
        if ({metric} == false) {{
            icon = L.AwesomeMarkers.icon({{icon: "map-marker", markerColor: "green"}});
        }} else if (value > {metric}) {{
            icon = L.AwesomeMarkers.icon({{icon: "map-marker", markerColor: "red"}});
        }} else {{
            icon = L.AwesomeMarkers.icon({{icon: "map-marker", markerColor: "blue"}});
        }}
        marker = L.marker(new L.LatLng(row[0], row[1]));
        marker.setIcon(icon);
        return marker;
    }};
    """
    
    # Add the marker cluster layer to the map
    folium.plugins.FastMarkerCluster(data_values, callback=callbacks).add_to(folium_map)
    
    # Display the map
    display(folium_map)
#     map_houses.save('cluster_map.html')

In [364]:
folium_cluster(df,'price','mean')

Mean:1.0000000000000002


# Heat Maps 
1. Shows density of points
2. Weights can be added to show density with respect to the weight 
    Example: 
    - Density of houses weighted to price, to show locations with higher density of more expensive homes
3. Heat maps can be rigged to show individual points (try -1, 0-.9, x<1, and '#' for blur), I advise against this, as this is essentially a less accurate choropleth map.


### Weights
1. How weight functions is dependent on folium version.
The weight is inputed as z or the third column, [x,y,x] or [lat,long, weight]
   - Version **.11** : weight needs to be normalized on a scale of 0-1
   - Version **.12 and newer** : weight just needs to be added as z


### Basic Heat Map


In [None]:
# Create a map centered at the mean latitude and longitude of the houses
folium_map = folium.Map(location=[df['lat'].mean(), df['long'].mean()], **kwargs)

# Create data list
values = zip(df['lat'], df['long'])
        
HeatMap(data=values, radius=10, min_opacity=.2, blur=12).add_to(folium_map)

# Display the map
display(folium_map)


### Level Up

In [422]:
def folium_Heatmap(df,column=None, max_value=None, tilelayer=False, **kwargs):
    '''
    Function creates a folium cluster and heatmap,
    
    df : dataframe - must have columns 'lat' and 'long'
    column : name 'column' 
                Used for heatmap weight, if not set then no weight is applied
    max_value : only if folium version is < .12, the defalut is .03 and will need to be adjusted
            
    max_zoom parameter will not work due to both folium.Map and HeatMap uses both arguments 
    
    tilelayer : False Setting to true will remove **kwargs from folium.Map and use folium.TileLayer,
                    - This lets user adjust opacity of basemap
    '''
    
    # Create a map centered at the mean latitude and longitude of the houses
    if tilelayer == True:
        folium_map = folium.Map(location=[df['lat'].mean(), df['long'].mean()])
        folium.TileLayer(**kwargs).add_to(folium_map)
    else:
        folium_map = folium.Map(location=[df['lat'].mean(), df['long'].mean()],**kwargs)
    # Create a list of tuples containing the latitude and longitude for each house    
    if column != None: 
        # Version control
        if folium.__version__ < '0.12.0':
            # scaling the column so weight is 0-1
            weight_min, weight_max = df[column].min(), df[column].max()
            df[column] = df[column].apply(lambda x: (x - weight_min) / (weight_max - weight_min))
            
            # Adding max_val argument
            if max_value == None:
                kwargs['max_val'] = .03
            else:
                kwargs['max_val'] = max_value
        values = zip(df['lat'], df['long'],df[column]) #np.vstack([df['lat'], df['long'],df[column]]).T
    
    else:
        values = zip(df['lat'], df['long'])#np.vstack([df['lat'], df['long']]).T
        
    HeatMap(data=values, **kwargs).add_to(folium_map)

    # Display the map
#     folium_map.save('Heat_map_test.html')
    display(folium_map)
      

In [430]:
folium_Heatmap(df, 'price', radius=15, min_opacity=.3, blur=20)

# Heat Map With Time

There are 2 main inputs for HeatMapWithTime, data and index
   - Data is the data array
   - Index is the timestamp or time index
    
Data must be grouped by the time index such that len(data) and len(timeindex) are equal

### Data Structure

The outermost brackets indicate that this is a list []
The two inner sets of brackets represent nested lists. 

1. The first nested list represents a specific month, year or datetime period (aka timeindex structure)
2. The second nested list contains the data points for that month. This list or array are the points that get plotted for the correlating time index. 

Example:
A dataset has 12 months, this data would need 12 sets of nested brackets, and each nested list contains the data points for each month.
1. nested list
2. time index [] 3 years means 3 sets [[x1,y1,z1]],[[x2,y2,z2]],[[[x3,y3,z3]]] where x1 is year 1,  x2 is year2 ...
3. data points [x1,y1,z1],[x1,y1,z1],[x1,y1,z1],[x1,y1,z1]...

Heatmapwithtime parts:
1. Create new date time column based on input, this is would normally be '%Y-%m' or '%Y-%m-%d'
2. Scale the weight column,      **This might be redundent and not needed*
3. Data list - I use np.vstack such that I can use tolist() to create a nested list without using for loops
4. Time Index - creating timeindex list 
5. zip together both lists, back to dataframe, groupby month-year
6. Convert back to lists
7. Create map

**Note this code can be simplified specifically steps 3-6* 

**Additional:**
- use_local_extrema = True means it rescales the weight for each timestamp, False =  all timestamps have the same weight scale
- **Heatmap style use:** min_opacity=0, max_opacity=.6,radius=12
- **Point style use:** min_opacity=.3, max_opacity=9,radius=3

## Level Up

In [433]:
def heatmapwithtime(df,column, date_column,*args,**kwargs):
    import inspect
    '''
    df : DataFrame
    column :  weight_column
    date_column : date column w/ year,month, day
    *args : 'y', 'm', and/or 'd', order matters, or can be a metric 'mean', 'median', 'mode'
    **kwargs : parameters for folium.Map and HeatMapWithTime - blur only works with newer folium versions
    '''
    # creating a date-time column
    df[date_column] = pd.to_datetime(df[date_column], format='%m/%d/%Y')
    datetime = {'y': '%Y', 'm': '%m', 'd': '%d'}
    date_format = '-'.join(datetime[d] for d in args if d in datetime)
    df['datetime'] = df[date_column].dt.strftime(date_format)
    
    # scaling the column weight
    weight_min, weight_max = df[column].min(), df[column].max()
    metrics = {'mean': df[column].mean(), 'median': df[column].median(), 'mode': df[column].mode()} 
    # Could also be [metrics[m] for m in args if m in metrics] 
    # HOWEVER below metric needs to be metric[0] 
    metric = next((metrics[m] for m in args if m in metrics),None)
    if metric:
        df[column] = df[column].apply(lambda x: (x) / (metric))
    else:
        df[column] = df[column].apply(lambda x: (x - weight_min) / (weight_max - weight_min))
    
    # Data nested list
    data_np = np.vstack([df['lat'], df['long'], df[column]]).T
    data_nps = data_np.tolist()
    
    # Time Index list
    time_index_df = df['datetime']
    time_index_input = time_index_df.values.tolist()

    # final list
    data_list = list(zip(data_nps,time_index_input))
    f_df = pd.DataFrame(data_list)
    final_df = f_df.groupby(by=1).agg(lambda x: x.tolist())
    
    # creating input data for heatmap with time
    df_data = final_df[0].tolist()
    df_time = final_df.index.tolist()
    
    # Creating Folium base map
    folium_map = folium.Map(location=[df['lat'].mean(), df['long'].mean()], zoom_start=10,**kwargs)
    
    # Creating **params - this allows it to function similar to HeatMap, can take any **kwargs 
    map_param = inspect.getfullargspec(HeatMapWithTime).args[1:]
    param = {key: value for key,value in kwargs.items() if key in map_param}
    
    # Heat map with time
    HeatMapWithTime(df_data,index=df_time, auto_play=True,
                         min_speed=.5,speed_step=0.1, **param).add_to(folium_map)
    
#     folium_map.save('Heat_map_test.html')
    # Display Map
    display(folium_map)

In [432]:
heatmapwithtime(df,'price','date','y','m', min_opacity=0, max_opacity=.6,radius=12, tiles=dark_map)

In [434]:
heatmapwithtime(df,'price','date','y','m', 'mean', min_opacity=.3, max_opacity=9,radius=3, tiles=dark_map)

# Choropleth Map

Json files are required for Choropleth maps. This can be made several ways. I made this in the gis notebook where each point has a box built around it set at around.

**Refer to gis notebook for choropleth maps**

In [None]:
# map colors
#https://matplotlib.org/stable/tutorials/colors/colormaps.html
YOR = 'YlOrRd'
YGB = 'YlGnBu'
cool = 'cool'
copper = 'copper' # Good for showing only high values on dark_map
winter ='winter' # blue to green
autumn = 'autumn' # Red to orange - would be better flipped
autunm_fliped = 'autumn_r' # yellow to red          ************ best one ********
bone = 'bone' # works on light backgrounds probably better flipped
hsv = 'hsv' # cyclic map - would look really cool mapped with the coef to show max impact of both positive and neg


# plot the choropleth map
fig, ax = plt.subplots(figsize=(10, 10))
gdf_polygons.plot(column='price', cmap=copper, linewidth=1, ax=ax, vmin=500000, vmax=3000000 ,marker='s')
ax.set_xlim([-122.5, -122])
ax.set_ylim([47.4, 47.78])
ctx.add_basemap(ax, crs=gdf_j.crs,source= dark_map , alpha=1)
# ax.axis('off')
plt.show()