## Synthesizing data with time-sensitive location data

In [3]:
import sys; sys.path.insert(0,'..')
from geotaste import *


In [4]:
def unique_vals(l, sort_by_count=True, sort_by_value=False):
    l = [x for x in l if x]
    lset=set(l)
    ll=list(lset)
    if sort_by_value:
        return sorted(ll)
    elif sort_by_count:
        lcount=Counter(l)
        return sorted(ll, key=lambda word: lcount[word], reverse=True)
    else:
        return ll        
    
def get_dropdown(l, name='', desc='', sort_by_count=True, sort_by_value=False):
    o = Dropdown(
        options=['*'] + unique_vals(l, sort_by_count=sort_by_count, sort_by_value=sort_by_value),
        description=desc
    )
    o.name = name
    return o

In [7]:
# get_authors_df().iloc[0]

In [23]:
def get_author_choices():
    # Author filters
    df = get_authors_df()

    ## custom
    byears = pd.to_numeric(df.author_birth_year,errors='coerce')
    bminval = int(byears.min())
    bmaxval = int(byears.max())
    
    birthyear_slider=widgets.IntRangeSlider(
        value=[bminval, bmaxval],
        min=bminval,
        max=bmaxval,
        step=1,
        description='Birth year',
    )
    birthyear_slider.name='author_birth_year'

    choices = [
        # author name (sort alphabetically)
        get_dropdown(df.author_name, 'author_name', 'Name', sort_by_value=True),
        
        # gender (sort alphabetically)
        get_dropdown(df.author_gender, 'author_gender', 'Gender', sort_by_value=True),

        # nationality
        get_dropdown(df.author_nationality, 'author_nationality', 'Nationality'),

        # language
        get_dropdown(df.author_language, 'author_language', 'Language'),

        # age of author
        birthyear_slider,
    ]
    return {ch.name:ch for ch in choices}

In [24]:
choices = get_author_choices()

In [25]:
for y,x in choices.items(): display(x)

Dropdown(description='Name', options=('*', 'A. A. Brill', 'A. A. Milne', 'A. Brewster', 'A. C. Benson', 'A. C.…

Dropdown(description='Gender', options=('*', 'Female', 'Male'), value='*')

Dropdown(description='Nationality', options=('*', 'GB - United Kingdom', 'US - United States', 'FR - France', …

Dropdown(description='Language', options=('*', 'eng - English', 'fre - French', 'ger - German', 'rus - Russian…

IntRangeSlider(value=(-551, 1942), description='Birth year', max=1942, min=-551)

In [57]:
def parse_author_choices(choices):
    return parse_choices(
        choices,
        get_authors_df()
    )

def parse_choices(choices, df):
    for key,choice in choices.items():
        if type(choice)==Dropdown and choice.value!='*':
            df = df[df[choice.name]==choice.value]

        if type(choice) in {widgets.IntRangeSlider}:
            minv,maxv = choice.value
            df[f'{choice.name}_q']=pd.to_numeric(
                df[choice.name], errors='coerce'
            )
            df = df.query(f'{minv} <= {choice.name}_q <= {maxv}')
    return df

In [58]:
choice = parse_author_choices(choices)