In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import contextily as ctx
# from shapely.geometry import Point, Polygon
# from pyproj import CRS
from sklearn.linear_model import LinearRegression
import seaborn as sns
import statsmodels.api as sm
import folium
from folium.plugins import MarkerCluster, FastMarkerCluster, HeatMap#, heat_map_withtime, HeatMapWithTime
from folium.plugins.heat_map_withtime import HeatMapWithTime
from sklearn.neighbors import KernelDensity

In [6]:
df = pd.read_csv('data/updated_housing.csv')

# Base Map

### Setting Tiles
Tiles
   These are a list of tile that can be used. This list is not exhaustive.
   
   **Example**: folium.Map(tiles = dark_map), default is 'OpenStreetMap'
   

In [311]:
# map tile types
dark_map = 'cartodbdark_matter'
stamentoner = "stamentoner"
litestamentoner = 'stamentonerlite'
light_map = 'cartodbpositron'
terrain = 'StamenTerrain'
ESRI_DeLorme = 'ESRIDeLorme'
ESRI_WorldImagery ='ESRIWorldImagery'
ESRI_NatGeoWorldMap = 'ESRINatGeoWorldMap'

### Setting Starting location and scale
location is the starting location, in the example below I center it along the mean lat and long


In [352]:
map_houses = folium.Map(location=[df['lat'].mean(), df['long'].mean()])

# Pluggin Arguments
Every pluggin requires a data arguments. This must be a list of lat,long and then a weight. For heatmapwith time this is different.

### Example:

In [364]:
locations = list(zip(df['lat'], df['long'],df['price']))

# Cluster map

The cluster map allows lets you cluster data so that you can see areas locations with higher counts. 

This can be done using MarkerCluster or FastMarkerCluster.

MarkerCluster allows for more customization option with the style of the marker however with large datasets like this one it is much slower then using FastMarkerCluster.

Below is a basic example for using FastMarkerCluster to create a cluster map.

In [372]:
# Create a folium map, this is centered at the mean latitude and longitude 
map_houses = folium.Map(location=[df['lat'].mean(), df['long'].mean()])

# Create a list of tuples containing the latitude and longitude for each data point
locations = list(zip(df['lat'], df['long'],df['price']))

# Add the marker cluster layer to the map
marker_cluster = folium.plugins.FastMarkerCluster(locations).add_to(map_houses)

# Display the map
display(map_houses)

# Level Up


In [380]:
def folium_cluster(df, column, metrics=False, **kwargs):
    '''
    Function creates a folium cluster map,
    
    df : dataframe - must have columns 'lat' and 'long'
    column : name 'column' 
                Used for cluster weight
    metrics : defalut = 0 (makes all icons be red)
            Cluster threshold metric 
            (mean,median,mode, or specific int/float value)
                
    **kwargs are for folium.Map,
    This is a list of the most common arguments/default values:
    width='100%', height='100%', tiles='OpenStreetMap', min_zoom=0, max_zoom=18, zoom_start=10
    '''
    # Code to determine metrics
    if metrics == 'mean':
        metric = df[column].mean()
        print(f'Mean:{metric}')
    elif metrics == 'median':
            metric = df[column].median()
            print(f'Median:{metric}')
    elif metrics == 'mode':
            metric = int(df[column].mode())
            print(f'Mode:{metric}')
    elif type(metrics) == int: 
            metric = metrics
    elif metrics == False:
        metric = 'false'
            
    # Create a folium map, this is centered at the mean latitude and longitude 
    min_lat, max_lat = df['lat'].min(), df['lat'].max()
    min_long, max_long = df['long'].min(), df['long'].max()
    map_houses = folium.Map(location=[df['lat'].mean(), df['long'].mean()],
                            min_lat=min_lat,
                            min_lon=min_long,
                            max_lat=max_lat,
                            max_lon=max_long,
                            max_bounds=False,
                            **kwargs)#tiles=tile)


    # Create a list of tuples containing the latitude and longitude for each data point
    locations = list(zip(df['lat'], df['long'],df[column]))
   
    # callback functions need to be in javascript to be run, this is adapted from:
    # https://github.com/python-visualization/folium/blob/main/examples/MarkerCluster.ipynb
    callbacks = f"""\
    function (row) {{
        var icon, marker, value;
        value = row[2]; // assuming the column is the 3rd column in the dataframe
        if ({metric} == false) {{
            icon = L.AwesomeMarkers.icon({{icon: "map-marker", markerColor: "green"}});
        }} else if (value > {metric}) {{
            icon = L.AwesomeMarkers.icon({{icon: "map-marker", markerColor: "red"}});
        }} else {{
            icon = L.AwesomeMarkers.icon({{icon: "map-marker", markerColor: "blue"}});
        }}
        marker = L.marker(new L.LatLng(row[0], row[1]));
        marker.setIcon(icon);
        return marker;
    }};
    """
    
    # Add the marker cluster layer to the map
    marker_cluster = folium.plugins.FastMarkerCluster(locations, callback=callbacks).add_to(map_houses)
    
    # Display the map
    display(map_houses)
    map_houses.save('cluster_map.html')

In [381]:
folium_cluster(df,'price','median',)

Median:867000.0


In [202]:
def folium_cluster(df, column, metrics='mean'):
    '''
    Function creates a folium cluster map,

    df : dataframe - must have columns 'lat' and 'long'
    column : name 'column'
                Used for cluster weight
    metrics : defalut = 'mean'
            Cluster threshold metric
            (mean,median,mode, or specific int/float value)

    '''
    # Code to determine metrics
    if metrics == 'mean':
        metric = df[column].mean()
        print(f'Mean:{metric}')
    elif metrics == 'median':
        metric = df[column].median()
        print(f'Median:{metric}')
    elif metrics == 'mode':
        metric = int(df[column].mode())
        print(f'Mode:{metric}')
    elif isinstance(metrics, (int, float)):
        metric = metrics
    else:
        raise ValueError(f"Unsupported value for 'metrics': {metrics}")

    
    # Create a list of tuples containing the latitude, longitude, and price for each data point
    locations = list(zip(df['lat'], df['long'], df[column]))
    

     
    
    # Display the map
    display(map_houses)

In [201]:
type(folium.Map())

folium.folium.Map

In [197]:
folium_cluster(df,'price','median')

Median:867000.0


# Level up

This requires geopandas, contextly and additional dependencies.


