# Leaflet cluster map of talk locations

Run this from the _talks/ directory, which contains .md files of all your talks. This scrapes the location YAML field from each .md file, geolocates it with geopy/Nominatim, and uses the getorg library to output data, HTML, and Javascript for a standalone cluster map.

In [6]:
import glob
import getorg
from geopy import Nominatim
import folium

In [7]:
g = glob.glob("*.md")

In [8]:
data_talks = []
for file in g:
    talk_dict = {}
    with open(file, 'r') as f:
        lines = f.read()
        
        if lines.find('location: "') > 1:
            loc_start = lines.find('location: "') + 11
            lines_trim = lines[loc_start:]
            loc_end = lines_trim.find('"')
            location = lines_trim[:loc_end]

            title_start = lines.find('title: "') + 8 #look for title
            title_trim = lines[title_start:]
            title_end = title_trim.find('"')
            title = title_trim[:title_end]
            
            venue_start = lines.find('venue: "') + 8 #look for venue
            venue_trim = lines[venue_start:]
            venue_end = venue_trim.find('"')
            venue_name = venue_trim[:venue_end]

            period_start = lines.find('date: ') + 6 #look for period
            period_trim = lines[period_start:]
            period_end = period_trim.find('\n')
            period = period_trim[:period_end]

            type_start = lines.find('type: "') + 7 #look for type
            type_trim = lines[type_start:]
            type_end = type_trim.find('\n')
            type = type_trim[:type_end -1]

            #get the name of the file
            link = "https://riccardogonzo.com/talks/" + file[:-3]

            talk_dict['title'] = title
            talk_dict['location'] = location
            talk_dict['venue'] = venue_name
            talk_dict['date'] = period
            talk_dict['type'] = type
            talk_dict['link'] = link
            
            data_talks.append(talk_dict)

In [9]:
geocoder = Nominatim(user_agent="gn_website")

from geopy.exc import GeocoderUnavailable, GeocoderTimedOut

for talk in data_talks:
    if talk['location'] == "Online":
        talk['lat'] = None
        talk['lon'] = None
        continue
    print(talk['location'])
    try:
        location = geocoder.geocode(talk['location'], timeout=10)
        if location:
            talk['lat'] = location.latitude
            talk['lon'] = location.longitude
        else:
            talk['lat'] = None
            talk['lon'] = None
    except (GeocoderUnavailable, GeocoderTimedOut) as e:
        print(f"Geocoding failed for {talk['location']}: {e}")
        talk['lat'] = None
        talk['lon'] = None

In [10]:
data_talks[8]

IndexError: list index out of range

In [None]:
data_talks[10]

In [None]:
def convert_date(date):
    date = date.split('-')
    month = date[1]
    day = date[2]
    year = date[0]
    if month == '01':
        month = 'January'
    elif month == '02':
        month = 'February'
    elif month == '03':
        month = 'March'
    elif month == '04':
        month = 'April'
    elif month == '05':
        month = 'May'
    elif month == '06':
        month = 'June'
    elif month == '07':
        month = 'July'
    elif month == '08':
        month = 'August'
    elif month == '09':
        month = 'September'
    elif month == '10':
        month = 'October'
    elif month == '11':
        month = 'November'
    elif month == '12':
        month = 'December'
    
    if day.endswith('1') and day != '11':
        day += 'st'
    elif day.endswith('2') and day != '12':
        day += 'nd'
    elif day.endswith('3') and day != '13':
        day += 'rd'
    else:
        day += 'th'

    # if days starts with 0, remove it
    if day.startswith('0'):
        day = day[1:]
    
    return month + ' ' + day + ', ' + year

In [None]:
for talk in data_talks:
    talk['venue'] = talk['venue'].replace('CCS', 'Conference on Complex Systems')

In [None]:
# remove Online talks
data_talks = [talk for talk in data_talks if talk['location'] != 'Online']

In [None]:
# merge the talks that took place in the same location
talks_merged = []

unique_locations = list(set([talk['location'] for talk in data_talks]))

for location in unique_locations:
    location_talks = [talk for talk in data_talks if talk['location'] == location]
    if len(location_talks) == 1:
        talk = location_talks[0]
        text = '<p style="font-size:1.3em; font-family: Avenir"><b>' + talk['venue'] + "</b>, "
        text += talk["location"]+"<br /><i>" + talk['title']
        text += "</i><br />" + convert_date(talk['date']) + " (" + talk['type'].lower() + ")</p>"
        talks_merged.append({'text': text, 'lat': talk['lat'], 'lon': talk['lon'], 'type': talk['type'], 'type': location_talks[0]['type'], 'num': None})
    else:
        text = '<p style="font-size:1.3em; font-family: Avenir">'

        types = []
        # sort location talks by date
        location_talks = sorted(location_talks, key=lambda x: x['date'], reverse=True)
        for talk in location_talks:
            text += '<b>' + talk['venue'] + "</b>, "
            text += talk["location"]+"<br /><i>" + talk['title']
            text += "</i><br />" + convert_date(talk['date']) + " (" + talk['type'].lower() + ")<br /><br />"
            
            if talk['type'] not in types:
                types.append(talk['type'])

        if "Invited talk" in types:
            type = "Invited talk"
        elif "Contributed talk" in types:
            type = "Contributed talk"
        elif "Contributed poster" in types:
            type = "Contributed poster"
    
        text += "</p>"
        talks_merged.append({'text': text, 'lat': location_talks[0]['lat'], 'lon': location_talks[0]['lon'], 'type': type, 'num': len(location_talks)})

In [None]:
talks_merged

In [None]:
n = folium.Map(location=[40,40], tiles="cartodb positron", zoom_start=3, z_index = 0)

folium.map.CustomPane("Invited talk", z_index=1000).add_to(n)
folium.map.CustomPane("Contributed talk", z_index=999).add_to(n)
folium.map.CustomPane("Contributed poster", z_index=998).add_to(n)

folium.map.CustomPane("arrow", z_index=450).add_to(n)
# sort data talks in such a way that posters are at the beginning, contributed talks in the middle, and invited talks at the end

talks_merged = sorted(talks_merged, key=lambda x: x['type'])

for idx, talk in enumerate(talks_merged):
    # iframe = folium.IFrame('<p style="font-family: Avenir"><b>' + talk['venue'] + "</b>, "+talk["location"]+"<br /><i>" + talk['title'] + "</i><br />" + convert_date(talk['date']) + " (" +
    #                        talk['type'].lower() + ")</p>", width=500, height=150)
    #html = '<p style="font-size:1.3em; font-family: Avenir"><b>' + talk['venue'] + "</b>, "+ talk["location"]+"<br /><i>" + talk['title'] + "</i><br />" + convert_date(talk['date']) + " (" + talk['type'].lower() + ")</p>"
    
    popup = folium.Popup(talk['text'], min_width=200, max_width=300, max_height=150, min_height=100)

    if talk['type'] == 'Invited talk':
        color = 'darkred'

    elif talk['type'] == 'Contributed talk':
        color = 'darkblue'

    elif talk['type'] == 'Contributed poster':
        color = 'darkblue'

    folium.Marker(location = [talk['lat'], talk['lon']], popup=popup, marker_size = 100,
                  icon=folium.Icon(color=color, prefix = "fa")).add_to(n)

n

In [None]:
# get the minimum and maximum latitude and longitude
min_lat = min([talk['lat'] for talk in data_talks if talk['lat'] is not None])
max_lat = max([talk['lat'] for talk in data_talks if talk['lat'] is not None])

min_lon = min([talk['lon'] for talk in data_talks if talk['lon'] is not None])
max_lon = max([talk['lon'] for talk in data_talks if talk['lon'] is not None])

n.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])

In [None]:
# save the map in html

n.save('../talkmap/talks_map.html')