# Visualizing Spatial Data
* A demo for a method to plot clustered data on a map.
* uses the folium module 
```
pip install folium
```

In [1]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex

In [2]:
# A couple of coords. The last column is the cluster. 
# Cluster number 72 is picked here just to showcase the different styles of points on the map
coords=[
    [54.3327, 10.1803, 1], # Sokratesplatz 1 
    [54.3298, 10.1810, 72], # Schwentine Mensa
    [54.3332, 10.1763, 1], # Ostuferhafen 15
    [54.3440, 10.1753, 72] # FH Beach
]
df=pd.DataFrame(coords, columns=["latitude", "longitude", "cluster"])

## Plot into an actual Map

In [3]:
def create_map(
    df,
    cluster_column, 
    plot_noise:bool=False, 
    center_location=[40.76,-73.98], 
    zoom=13,
    marker_radius=5,
    cmap=plt.get_cmap('tab20b')
):

    '''
    Plot points on a map, styled according to their membership in a clustering. 
    Each point is represented as a polygon with either 3 or 7 sides and its color and its filling (same color or white). 
    The combination (color, sides, filling) identifies a cluster. 
    Thus the number of clusters can at most be at most 4 times the number of colors.
    An exception is thrown if the number of clusters is too large.
    
    df - the dataframe with columns longitude and latitude
    cluster_column - the name of the column in df that holds the cluster assignments
    center_location - latitude and logitude of the center of the map to be displayed
    plot_noise - boolean variable. If false, points with cluster id -1 are ignored. Otherwise, they are plotted in black.
    cmap - a color map. The number of colors in the map determines the maximum number of clusters that can be plotted. 
    '''
    
    num_colors=len(cmap.colors)
    clusters=df[cluster_column]
    num_clusters=len(clusters[clusters!=-1].unique()) # ignore noise when counting clusters

    if (4*num_colors<num_clusters):
        raise Exception("There are not enough colors and marker types to draw all clusters in different styles. Available colors: ", num_colors, " Available clusters: ", num_clusters)

    folium_map = folium.Map(location=center_location, zoom_start=zoom)
    for _, row in df.iterrows():
        cluster=int(row[cluster_column])
        if cluster==-1:
            # paint noise in black or not at all
            if (plot_noise):
                cluster_colour= '#000000'
            else:
                continue
        else:
            color_index=cluster % num_colors
            cluster_colour = rgb2hex(cmap(color_index))

        folium.RegularPolygonMarker(
            number_of_sides=3 if (cluster<num_colors or (2*num_colors<=cluster and cluster<3*num_colors)) else 7,
            location= [row['latitude'],row['longitude']],
            radius=marker_radius,
            popup= row[cluster_column],
            color= cluster_colour,
            fill=True,
            fill_color=cluster_colour if (cluster<2*num_colors) else '#FFFFFF',
            opacity=.5,
            fill_opacity=.5
        ).add_to(folium_map)
    return folium_map

In [4]:
create_map(
    df, 
    'cluster', 
    center_location=[54.3400, 10.180],
    zoom=14,
    marker_radius=15
)