# Map Data Prototyping

## Setup

In [339]:
import io
import json

In [197]:
import pandas
import geocoder
import folium
import folium.plugins
from tqdm.auto import tqdm
import requests
import boto3

In [21]:
tqdm.pandas()

  from pandas import Panel


## Reading Data

In [341]:
config = json.load(open("../creds/config.json"))

In [342]:
object_name = config["data_object_name"]
bucket_name = config["data_bucket_name"]
s3_url = f"s3://{bucket_name}/{object_name}"

In [343]:
s3 = boto3.client("s3")
s3_obj = s3.get_object(Bucket=bucket_name, Key=object_name)

In [344]:
data_df = pandas.read_parquet(io.BytesIO(s3_obj['Body'].read()))

In [345]:
data_df.columns

Index(['Name', 'Gender', 'DOB', 'Age on Arrival', 'Arrival Date', 'Ship',
       'Ship No. ', 'Ship dock point', 'Master 1 Name', 'Master 1 occupation',
       'Master 1 Location', 'Profession 1', 'Master's Name 2',
       'Master 2 Location', 'Profession 2', 'Start of 2nd Apprenticeship',
       'Reason for 2nd Apprenticeship', 'Orphan', 'Any living relatives',
       '1840 Commission of Enquiry', '1840 Commission Master's Testimony',
       '1840 Commissioner's comments', '1840 Commissioner Name',
       'Records relating to Master', 'GPS Coordinates', 'Source', 'Other'],
      dtype='object')

## Geocoding

Doing this on a location basis, as this is more efficient that working on the data more directly.

In [323]:
data_df["Master 1 Location"].value_counts()

                             220
Cape Town                     88
Stellenbosch                  33
Grahamstown                   32
Groot Drankenstein, Paarl     19
                            ... 
Wynberg, Cape Town             1
Truyntjiesrivier, Caledon      1
Plumstead                      1
Goedvertrouw, Caledon          1
Zoetendaalsvlei, Caledon       1
Name: Master 1 Location, Length: 97, dtype: int64

In [324]:
locations = data_df["Master 1 Location"].unique()

Using two, free-to-access geocoding services, namely OSM and ArcGIS (isn't ESRI nice?).

In [325]:
location_lookup = {}

with requests.Session() as osm_session, requests.Session() as ag_session:
    for loc in tqdm(locations, desc="Place Lookup"):
        loc_string = (
            loc + ", ZA" if loc != None and loc != None and loc != "" else
            None
        )

        if loc_string is not None:
            for gc, session in ((geocoder.arcgis, ag_session),
                                (geocoder.osm, osm_session), 
                                ):
                current_loc = location_lookup.get(loc, None)
                if current_loc is None:
                    new_loc = gc(loc_string, session=session, timeout=10)
                    if new_loc.ok:
                        location_lookup[loc] = new_loc
                    else:
                        print(f"lookup for '{loc_string}' failed...")

HBox(children=(FloatProgress(value=0.0, description='Place Lookup', max=98.0, style=ProgressStyle(description_…




In [326]:
data_df["Master1Location"] = data_df["Master 1 Location"].progress_apply(
    lambda loc: location_lookup.get(loc, None)
)

HBox(children=(FloatProgress(value=0.0, max=665.0), HTML(value='')))




Check to see how we're doing:

In [327]:
data_df.Master1Location.notna().sum()/data_df.Master1Location.shape[0]

0.5789473684210527

## Map Time

### Individual Markers

Going to use marker clusters for now...

In [184]:
# markers = data_df[
#     data_df.Master1Location.notna() & (data_df.Gender == "M")
# ].progress_apply(
#     lambda row: (
#         folium.Marker(
#             location=[row.Master1Location.y, 
#                       row.Master1Location.x],
#             popup=f"{row.Name} ({row['Master 1 Location']})",
#             icon=folium.Icon(icon='male', prefix='fa', color='blue')
            
#         )
#     ),
#     axis=1
# )
# print(markers.shape)

# markers = pandas.concat([markers, data_df[
#     data_df.Master1Location.notna() & (data_df.Gender == "F")
# ].progress_apply(
#     lambda row: (
#         folium.Marker(
#             location=[row.Master1Location.y, 
#                       row.Master1Location.x],
#             popup=f"{row.Name} ({row['Master 1 Location']})",
#             icon=folium.Icon(icon='female', prefix='fa', color='red'),
#         )
#     ),
#     axis=1
# )])
# print(markers.shape)

HBox(children=(FloatProgress(value=0.0, max=309.0), HTML(value='')))


(309,)


HBox(children=(FloatProgress(value=0.0, max=65.0), HTML(value='')))


(374,)


In [193]:
# for marker in markers:
#     marker.add_to(m)

### Map Setup

In [364]:
m = folium.Map(
    location=[-30.5367, 24.5198],
    #tiles='CartoDB positron',
    tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Physical_Map/MapServer/tile/{z}/{y}/{x}',
    attr='Map tiles by <a href="http://stamen.com">Stamen Design</a>, <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> &mdash; Map data &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors',
    zoom_start=6
)

### Marker Clusters

In [329]:
data_df.columns

Index(['Name', 'Gender', 'DOB', 'Age on Arrival', 'Arrival Date', 'Ship',
       'Ship No. ', 'Ship dock point', 'Master 1 Name', 'Master 1 occupation',
       'Master 1 Location', 'Profession 1', 'Master's Name 2',
       'Master 2 Location', 'Profession 2', 'Start of 2nd Apprenticeship',
       'Reason for 2nd Apprenticeship', 'Orphan', 'Any living relatives',
       '1840 Commission of Enquiry', '1840 Commission Master's Testimony',
       '1840 Commissioner's comments', '1840 Commissioner Name',
       'Records relating to Master', 'GPS Coordinates', 'Source', 'Other',
       'Master1Location'],
      dtype='object')

In [330]:
job_icon_lookup = {
    "farmworker": "leaf",
    "farmer": "leaf",
    "domestic": "home",
    "servant": "home",
    "sheperd": "leaf",
    "baker's apprentice": "shopping-basket",
    "farm labourer": "leaf",
    "groomsman": "leaf",
    "store servant": "shopping-basket",
    "domestic and gardener": "home",
    "gardener": "leaf",
    "farm servant": "leaf",
    "stableboy": "leaf",
    "house servant": "home",
    "blacksmith's apprentice": "shopping-basket",
    "bakers apprentice": "shopping-basket",
}

In [331]:
data_df["Position1Icon"] = data_df["Profession 1"].progress_apply(
    lambda job: (
        job_icon_lookup.get(
            job.lower().strip() if job else None,
            "male")
    )
)

HBox(children=(FloatProgress(value=0.0, max=665.0), HTML(value='')))




In [332]:
colour_lookup = {
    "m": "blue",
    "f": "red",
}

In [333]:
filters = [
    data_df.Master1Location.notna()
]

locations = []
popups = []
icons = []

for df_filter in filters:
    locations += data_df[df_filter].progress_apply(
        lambda row: [row.Master1Location.y, row.Master1Location.x],
        axis=1
    ).values.tolist()
    
    popups += data_df[df_filter].progress_apply(
        lambda row: folium.map.Popup(
            html= "<br>".join([
                f"<strong>{col.title()}</strong>: {row[col]}"
                for col in (
                    'Name', 'Gender', 'DOB', 'Arrival Date', 'Ship',
                    'Master 1 Name', 'Master 1 occupation', 'Master 1 Location',
                    'Profession 1', 'Orphan', 'Any living relatives'
                )
                if row[col] != None and row[col] != ""
            ]),
            parse_html = False, max_width=200
        ),
        axis=1
    ).values.tolist()
    
    icons += data_df[df_filter].progress_apply(
        lambda row: (
            folium.Icon(
                icon=row.Position1Icon,
                prefix='fa',
                color=colour_lookup.get(
                    row.Gender.lower().strip() if row.Gender else None, 
                    "beige")
            )
        ),
        axis=1
    ).values.tolist()

HBox(children=(FloatProgress(value=0.0, max=385.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=385.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=385.0), HTML(value='')))




In [365]:
folium.plugins.MarkerCluster(
    locations, popups, icons
).add_to(m)

<folium.plugins.marker_cluster.MarkerCluster at 0x7f4e48b432d0>

### Output

In [366]:
m

In [338]:
m.save('../dist/map_test.html')