In [22]:
import pandas as pd
import numpy as np
import folium
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [16]:
df_venues_raw = pd.read_pickle('app_ldn_venues_raw.pkl')
df_rent = pd.read_pickle('app_ldn_rent.pkl')
ldn_geojson = 'london_boroughs_proper.geojson'
df_groups_norm = pd.read_pickle('app_ldn_groups_norm.pkl')

In [18]:
ALL_VENUES = df_venues_raw.copy()
LONDON_GEOJSON = ldn_geojson
LONDON = [51.5074, -0.1278]

ALLOWED_ACM_TYPES = df_rent['Category'].unique().tolist()
ALLOWED_GROUPS = df_groups_norm.columns.tolist()[1:]

In [13]:
def filter_rent_data(df=df_rent, categories=None, rent_range=None):
    """
    Returns boroughs that satisfy the conditions for `categories` and `rent_range`
        
    Inputs:
        df - pandas DataFrame containing rent data -> !!! Assumes df_rent as default 
        categories - an iterable or a string specifying appropriate accommodation types
        rent_range - a list or a tuple with r_min and r_max rent ranges.
        
    Output:
        boroughs - a list of boroughs that match the condition
    
    """
    if isinstance(categories, str):
        cats = [categories]
    else:
        cats = categories
    
    cat_cond = df['Category'].isin(cats)
    rent_lower = rent_range[0]
    rent_higher = rent_range[1]
    
    # If invalid data provided
    if rent_lower > rent_higher:
        rent_higher = rent_lower
    
    rent_cond_1 = (df['Lower quartile'] <= rent_higher) & (df['Upper quartile'] >= rent_higher)
    rent_cond_2 = (df['Lower quartile'] <= rent_lower) & (df['Upper quartile'] >= rent_lower)
    rent_cond = rent_cond_1 | rent_cond_2
    rent_cond = (df['Lower quartile'] <= rent_higher) & (df['Upper quartile'] >= rent_lower)
    
    not_null = ~df['Median'].isnull()

    df_filtered = df.loc[(cat_cond & rent_cond & not_null)]
    
    boroughs = df_filtered['Borough'].unique().tolist()
   
    return boroughs

def create_preferences(ranking=None):
    "Converts `ranking` list to normalized pandas DataFrame"
    pref_dict = {
        'Eating out': 0,
        'Entertainment': 0,
        'Green spaces': 0,
        'Groceries': 0,
        'Health and Sports': 0,
        'Nightlife': 0,
        'Other': 0,
        'Public Transport': 0,
        'Shopping': 0,
    }
    
    N = len(ranking)
    
    for i, cat in enumerate(ranking): 
        pref_dict[cat] = N - i
    
    df = pd.DataFrame.from_dict(pref_dict, orient='index', columns=['Preference'])
    df['Preference'] = df['Preference'] / df['Preference'].sum()
    
    return df

def recommend_boroughs(W=df_groups_norm, p=None, n_brghs=5):
    """
    Calculates matrix product of DataFrames W and p
    
    Inputs:
        W - pandas DataFrame containing venue group density for each borough
        p - pandas DataFrame with user group preferences
        n_brghs - int, number of boroughs to recommend
    
    Returns:
        rec_boroughs - `n_brghs` number of boroughs by highest match value
        df_rec - resulting recommendation matrix as pandas DataFrame
    
    """
    W_ = W.copy()
    W_.set_index(['Borough'], inplace=True)
    df_rec = (p['Preference'] * W_).sum(axis=1).to_frame()
    df_rec.columns = ['Match']
    df_rec.sort_values(by='Match', ascending=False, inplace=True)
    rec_boroughs = df_rec.head(n_brghs).index.tolist()

    return rec_boroughs, df_rec


def recommend_and_plot(df_groups, df_rent, rent_range=None, acm_type=None, ranking=None, n=5):
    """
    Recommends top n venues and plots the results on the map
    
    """
    available_boroughs = filter_rent_data(df=df_rent, categories=acm_type, rent_range=rent_range)
    p = create_preferences(ranking=ranking)
    df_grp_filtered = df_groups.loc[df_groups['Borough'].isin(available_boroughs)]
    rec_boroughs, df_rec = recommend_boroughs(W=df_grp_filtered, p=p, n_brghs=n)
    
    
    df_rec.reset_index(inplace=True)
    
    df_all = ALL_VENUES.copy()
    df_matched = df_all.loc[(df_all['Borough'].isin(rec_boroughs)) & (df_all['Group'].isin(ranking))]
    
    print('='*80)
    print('= Recommended boroughs based on your preferences')
    print('='*80)
    
    for i, b in enumerate(rec_boroughs):
        print(f"#{i+1}: {b}")
    print('='*80)
    print('= Plotting venues...')
                                                                     
                             
    # Plotting
    map_rec = folium.Map(location=LONDON, tiles='cartodbpositron', zoom_start=10)

    # Chloropleth Map of boundaries, where shading is dependent on median rent for 'All categories'
    folium.Choropleth(
        geo_data=LONDON_GEOJSON,
        fill_color='BuPu',
        data=df_rec,
        columns=['Borough', 'Match'],
        key_on='feature.properties.name',
        weight=1,
        line_color='black',
        fill_opacity=0,
        line_opacity=0.5,
        highlight=True,
    ).add_to(map_rec)  

    df_boroughs = df_matched[['Borough', 'BoroughLat', 'BoroughLon']].drop_duplicates()
                             
    # Add borough markers to map
    for i, row in df_boroughs.iterrows():
        borough = row['Borough']
        coords = [row['BoroughLat'], row['BoroughLon']]
        rent_pref = f"{rent_range[0]} - {rent_range[1]}"
        lbl_str = f"{borough} ({rent_pref}\\xA3)" # create a label with borough name and rent range

        label = folium.Popup(lbl_str, parse_html=True) # so that the label is used as a Popup

        # Add the marker
        folium.CircleMarker(
            coords,
            radius=6,
            popup=label,
            color='black',
            weight=1,
            fill=True,
            fill_color='black',
            fill_opacity=1.0,
            parse_html=False,
        ).add_to(map_rec)  

    color_map = {
        'Eating out': '#e41a1c',
        'Entertainment': '#377eb8',
        'Going out': '#ffff33',
        'Green spaces': '#984ea3',
        'Groceries': '#ff7f00',
        'Health and Sports': '#4daf4a',
        'Other': '#999999',
        'Public Transport': '#a65628' ,
        'Shopping': '#f781bf',
    }

    for i, row in df_matched.iterrows():
#         # let's plot only every 5th point
#         if i % 10 == 0:
#             continue
        lat_i = row['Venue Latitude']
        lon_i = row['Venue Longitude']
        venue = row['Venue']
        group = row['Group']

        label = f"{venue} ({group})"

        coords = [lat_i, lon_i]
        folium.CircleMarker(
            coords,
            radius=4,
            popup=label,
            color='black',
            weight=1,
            fill=True,
            fill_color=color_map[group],
            fill_opacity=0.9,
            parse_html=False).add_to(map_rec)

    return map_rec

In [21]:
# Interactive stuff starts here!