# Imports

In [1]:
import pandas as pd
import folium
from folium import plugins
from folium.plugins import HeatMap
from folium.plugins import HeatMapWithTime
import datetime
import calendar
import numpy as np

# Loads

In [2]:
df = pd.read_csv('Material/nyc_cop_lon2.csv')

# Cleanup

In [3]:
df

Unnamed: 0.1,Unnamed: 0,index,time,temp,dwpt,rhum,city,wdir,wspd,pres,longitude,latitude,hour,type,count,ID,Segment ID
0,0,0,2010-01-01 00:00:00,-5.8,-7.0,91.0,Copenhagen,360.0,22.3,1002.6,12.564408,55.686473,0.0,bicycle,13.0,,
1,1,1,2010-01-01 00:00:00,-5.8,-7.0,91.0,Copenhagen,360.0,22.3,1002.6,12.565354,55.641459,0.0,vehicle,27.0,,
2,2,2,2010-01-01 00:00:00,-5.8,-7.0,91.0,Copenhagen,360.0,22.3,1002.6,12.482855,55.710998,0.0,vehicle,82.0,,
3,3,3,2010-01-01 00:00:00,-5.8,-7.0,91.0,Copenhagen,360.0,22.3,1002.6,12.482855,55.710998,0.0,vehicle,163.0,,
4,4,4,2010-01-01 00:00:00,-5.8,-7.0,91.0,Copenhagen,360.0,22.3,1002.6,12.595038,55.670248,0.0,vehicle,242.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3021861,3021861,178941,2014-12-31 23:00:00,-1.7,-13.9,39.0,New-York,250.0,13.0,1024.6,-73.985890,40.699810,23.0,bicycle,6.0,,
3021862,3021862,178942,2014-12-31 23:00:00,-1.7,-13.9,39.0,New-York,250.0,13.0,1024.6,0.000000,0.000000,23.0,bicycle,,,
3021863,3021863,178943,2014-12-31 23:00:00,-1.7,-13.9,39.0,New-York,250.0,13.0,1024.6,-73.968386,40.711282,23.0,bicycle,2.0,,
3021864,3021864,178944,2014-12-31 23:00:00,-1.7,-13.9,39.0,New-York,250.0,13.0,1024.6,-73.994750,40.715600,23.0,bicycle,6.0,,


In [3]:
# Selecting the dataset to visualize here
df = df.fillna(0)
df = df[(df['city']=='New-York')&(df['type']!='vehicle')] # New-York bicycle data
# df = df[(df['city']=='Copenhagen')&(df['type']!='vehicle')] # Copenhagen bicycle data
# df = df[(df['city']=='New-York')&(df['type']=='vehicle')] # Copenhagen vehicle data

# Processing

#### Helper functions for dividing column in range classes

In [5]:
# Divide column in ranges

def calc_section(minim, sec_len, number, name):
    section = int((number - minim)/sec_len)
    left = minim + (section*sec_len)
    right = minim + ((section+1)*sec_len)
    two_decimal_left = "{:.1f}".format(left)
    two_decimal_right = "{:.1f}".format(right)
    return "{} from {} to {}".format(name, two_decimal_left, two_decimal_right) 

def divide_into_range(col, section_count, name):
    mx = col.max()
    mn = col.min()
    rng = mx - mn
    section_len = rng / section_count
    return col.apply(lambda x: calc_section(mn, section_len, x, name))

def get_left(minim, sec_len, number):
    section = int((number - minim)/sec_len)
    return minim + (section*sec_len)

def get_temp_sorting_value(col, section_count):
    mx = col.max()
    mn = col.min()
    rng = mx - mn
    section_len = rng / section_count
    return col.apply(lambda x: get_left(mn, section_len, x))

#### Make a temperature plot function

In [28]:
def make_heatmap(parameter, nr_of_ranges, parameter_name):
    # Make a copy for the plotting not to damage main df
    df_copy = df.copy()

    # Divide temperature into 20 ranges
    df_copy[parameter] = df_copy[parameter].astype(float)
    df_copy['sorting_value'] = get_temp_sorting_value(df_copy[parameter], nr_of_ranges)
    df_copy[parameter] = divide_into_range(df_copy[parameter], nr_of_ranges, parameter_name)
    
    # Combine coordinates in one column as string to allow for sorting
    df_copy['coordinates']=df_copy['latitude'].astype(str) +","+ df_copy["longitude"].astype(str)

    # Reduce number of used columns
    df_copy = df_copy[[parameter, 'count','coordinates','sorting_value']]

    # group by main criteria and coordinates, so you have several coordinates per parameter
    df_copy = df_copy.groupby([parameter, 'coordinates']).agg({'count':np.mean, 'sorting_value':np.min}).reset_index()

    # Making counts relative 
    max_value = df_copy['count'].max()
    df_copy['count'] = df_copy['count']/max_value

    # Sort by temperature ranges
    df_copy.sort_values(by=['sorting_value'])

    # Add count to coordinates
    df_copy['coordinates'] = df_copy['coordinates'].astype(str) +","+ df_copy["count"].astype(str)

    # Split coordinates into a list
    df_copy['coordinates'] = df_copy['coordinates'].str.split(',')

    # Create mapping for future sorting
    mapping = df_copy[[parameter, 'sorting_value']]
    mapping = mapping.groupby(parameter).min()

    #group by temperature
    df_copy = df_copy.groupby(parameter)['coordinates'].apply(list).reset_index(name='coordinates')

    # Add sorting column
    df_copy = df_copy.merge(mapping, how='inner', on=parameter)
    df_copy = df_copy.sort_values(by=['sorting_value'])

    # Prepare indexes
    dates = df_copy[parameter].tolist()

    # Extract list of cordinates 
    data = df_copy['coordinates'].tolist()

    # Create a map with a center in Copenhagen and appropriate zoom
    #m = folium.Map(location=[55.6761,12.5683], # Copenhagen
    m = folium.Map(location=[40.7228,-73.9860], # New-York
                            tiles = "Stamen Toner",
                            zoom_start = 13) # New-york
                            #zoom_start = 12) # Copenhagen

    # Add points and indexes to the map with a pretty gradient
    hm = HeatMapWithTime(data, 
                         index = dates, 
                         gradient = {0.55: 'blue', 0.9: 'lime', 0.1: 'red'}, 
                         radius = 50, 
                         min_opacity  = 0.15, 
                         max_opacity =1, 
                         min_speed = 1, 
                         max_speed = 10)
    hm.add_to(m)
    return m

#### Save results in html

In [29]:
# Output the vvisualization and save it into the file
mapp = make_heatmap('temp', 20, 'Temperature')
mapp.save("New_York_map_temperature_bicycles.html")
mapp

In [30]:
# Output the vvisualization and save it into the file
mapp = make_heatmap('rhum', 20, 'Humidity')
mapp.save("New_York_map_humidity_bicycles.html")
mapp