# Playing with places

In [None]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap

In [None]:
df = pd.read_csv('data/Mapping Places Spreadsheet - Sheet1.csv', keep_default_na=False)
df.head()

## Clean up the data

In [None]:
# Clean up the lats and lons
df = df.replace({'°\s*[EWNS]+': ''}, regex=True)
df = df.replace({'\s*[EWNS]+$': ''}, regex=True)
df['Latitude'] = pd.to_numeric(df['Latitude'], errors='coerce')
df['Longitude'] = pd.to_numeric(df['Longitude'], errors='coerce')
df.head()

## Reorganise the data so we have one row per image

In [None]:
# This cell creates a CSV file with a row for each image, preserving all the parent item metadata

def tidy_split(df, column, sep='|', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Params
    ------
    df : pandas.DataFrame
        dataframe with the column to split and expand
    column : str
        the column to split and expand
    sep : str
        the string used to split the column's values
    keep : bool
        whether to retain the presplit value as it's own row

    Returns
    -------
    pandas.DataFrame
        Returns a dataframe with the same columns as `df`.
    """
    indexes = list()
    new_values = list()
    df = df.dropna(subset=[column])
    for i, presplit in enumerate(df[column].astype(str)):
        values = presplit.split(sep)
        if keep and len(values) > 1:
            indexes.append(i)
            new_values.append(presplit)
        for value in values:
            indexes.append(i)
            new_values.append(value)
    new_df = df.iloc[indexes, :].copy()
    new_df[column] = new_values
    return new_df

images_df = tidy_split(df, 'File Number', sep='|')

# Reomve rows with no images
images_df = images_df.loc[images_df['File Number'] != '']
images_df.head()

## Summarise the data

In [None]:
#How many images have been geo-located?
images_df = images_df.loc[images_df['Latitude'].notnull() & images_df['Longitude'].notnull()]
images_df.shape[0]

In [None]:
# Top 50 places!
images_df['Place Name'].value_counts()[:50]

In [None]:
m = folium.Map(
    location=[-30, 135],
    zoom_start=4
)
# We'll cluster the markers for better readability
marker_cluster = MarkerCluster().add_to(m)

for index, img in images_df.iterrows():
    # Create the content of the marker popup -- includes a search link back to Trove!
    #html = '<b>{}</b><br><a target="_blank" href="https://trove.nla.gov.au/newspaper/result?q={}&l-title={}&l-category={}">{} articles'.format(place[0], params['q'], titles, params.get('l-category', ''), total)
    # Add the marker to the map
    html = '<b>{0}</b><br><a target="_blank" href="http://digital.sl.nsw.gov.au/delivery/DeliveryManagerServlet?dps_pid={1}&embedded=true&toolbar=false"><img width="200" src="https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/500/{1}-500.jpg"></a>'.format(img['Place Name'].replace("'", ';rsquo'), img['File Number'])
    folium.Marker([img['Latitude'], img['Longitude']], popup=html).add_to(marker_cluster)

m

In [None]:
places_df = df.loc[df['Latitude'].notnull() & df['Longitude'].notnull()]
locations = []
# Loop through the places
for index, row in places_df.iterrows():
    # Get the total
    images = row['File Number'].split('|')
    # Add the coordinates of the place to the list of locations as many times as there are articles
    locations += ([[row['Latitude'], row['Longitude']]] * len(images))


# Create another map
m2 = folium.Map(
    location=[-30, 135],
    zoom_start=4
)

#Add the heatmap data!
HeatMap(locations).add_to(m2)
m2