## Synthesizing data with time-sensitive location data

In [1]:
import sys; sys.path.insert(0,'..')
from geotaste import *


### Authors

In [2]:
# choices = get_author_choices()
# show_author_choices(choices)

In [3]:
# parse_author_choices(choices)

### Books

In [4]:
df = get_books_df(with_author_data=False)
df.sample(n=1).iloc[0]

uri                  https://shakespeareandco.princeton.edu/books/a...
title                                                  Collected Poems
author                                                               Æ
editor                                                                
translator                                                            
introduction                                                          
illustrator                                                           
photographer                                                          
year                                                            1920.0
format                                                            Book
uncertain                                                        False
ebook_url                 https://archive.org/details/dli.ernet.504076
volumes_issues                                                        
notes                                                                 
event_

In [5]:
df=get_books_df()

In [6]:
df.introduction.value_counts()

                               5995
Ellis, Havelock                   4
Yeats, William Butler             3
Wells, H. G.                      3
Swinburne, Algernon Charles       2
                               ... 
Phelps, William Lyon              1
Hyde, H. Montgomery               1
Niles, Abbe                       1
Street, G. S.                     1
Le Gallienne, Eva                 1
Name: introduction, Length: 63, dtype: int64

In [19]:
def _parse_circulation_years(cyearstr):
    return [int(cyr) for cyr in str(cyearstr).split(';') if cyr and str(cyr).isdigit()]

def get_book_choices():
    # Author filters
    df = get_books_df(with_author_data=False)

    all_circ_years = [x for l in df.circulation_years.apply(_parse_circulation_years) for x in l]

    choices = [
        # book name (sort alphabetically)
        get_select(df.title, 'title', 'Title', sort_by_value=True),
        get_select(df.format, 'format', 'Format', sort_by_value=False),
        get_select(df.uncertain, 'uncertain', 'Uncertain?', sort_by_value=False),
        get_select(df.editor, 'editor', 'Editor', sort_by_value=False),
        get_select(df.translator, 'translator', 'Translator', sort_by_value=False),
        get_select(df.introduction, 'introduction', 'Intro by', sort_by_value=False),

        get_int_slider(all_circ_years, 'circulation_years', 'Circ. years'),
        get_int_slider(df.year, 'year', "Pub year"),

        get_int_slider(df.event_count, 'event_count', "Event count"),
        get_int_slider(df.borrow_count, 'borrow_count', "Borrow count"),

        
    ]
    return {ch.name:ch for ch in choices}

In [20]:
choices = get_book_choices()
show_choices(choices)

### 

SelectMultiple(description='Title', options=("'Twixt Land and Sea", '12:30 from Heaven', '14a', '1914 and Othe…

SelectMultiple(description='Format', options=('Book', '(none)', 'Periodical', 'Photograph', 'Article', 'Phonog…

SelectMultiple(description='Uncertain?', options=(False, True), rows=2, value=())

SelectMultiple(description='Editor', options=('(none)', "O'Brien, Edward J.", 'Morley, John', 'Ellis, Havelock…

SelectMultiple(description='Translator', options=('(none)', 'Garnett, Constance', 'Paul, Cedar;Paul, Eden', 'L…

SelectMultiple(description='Intro by', options=('(none)', 'Ellis, Havelock', 'Wells, H. G.', 'Yeats, William B…

IntRangeSlider(value=(1919, 1962), description='Circ. years', max=1962, min=1919)

IntRangeSlider(value=(1485, 1962), description='Pub year', max=1962, min=1485)

IntRangeSlider(value=(0, 149), description='Event count', max=149)

IntRangeSlider(value=(0, 115), description='Borrow count', max=115)

In [14]:
def parse_book_choices(choices):
    df = get_books_df()
    df,desc = parse_choices(choices, df, except_keys={'circulation_years'})
    ok_years = choices.get('circulation_years')
    if ok_years is not None and ok_years.value != (ok_years.min, ok_years.max):
        ok_years_set = set(list(range(ok_years.value[0], ok_years.value[1]+1)))
        df = df[df.circulation_years.apply(lambda cyrstr: bool(set(_parse_circulation_years(cyrstr)) & ok_years_set))]
        desc['circulation_years']=ok_years.value

    
    return df,desc

In [16]:
odf,odesc=parse_book_choices(choices)
print(odesc)
odf

{}


Unnamed: 0_level_0,uri,title,author,editor,translator,introduction,illustrator,photographer,year,format,...,author_iccu_url,author_isni_name,author_isni_url,author_wikidata_url,author_wikipedia_url,author_worldcat_identity_url,author_is_expat,author_birth_decade,author_generation,borrow_count_q
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
malory-morte-darthur,https://shakespeareandco.princeton.edu/books/m...,Le Morte d'Arthur,"Malory, Thomas",,,,,,1485.0,Book,...,,"Malory, Thomas 1408-1471",http://isni.org/isni/0000000374003514,https://www.wikidata.org/entity/Q309907#siteli...,https://en.wikipedia.org/wiki/Thomas_Malory,https:https://www.worldcat.org/identities/lccn...,True,1400s,,7
more-utopia,https://shakespeareandco.princeton.edu/books/m...,Utopia,"More, Thomas",,,,,,1516.0,Book,...,http://id.sbn.it/af/IT\ICCU\CFIV\052646,Tom‡s Moro 1478-1535 Santo,http://isni.org/isni/0000000031820467,https://www.wikidata.org/entity/Q42544#sitelin...,https://en.wikipedia.org/wiki/Thomas_More,https:https://www.worldcat.org/identities/lccn...,True,1470s,,2
norton-gorboduc,https://shakespeareandco.princeton.edu/books/n...,Gorboduc,"Norton, Thomas",,,,,,1561.0,Book,...,,"Norton, Thomas 1532-1584",http://isni.org/isni/0000000108814357,https://www.wikidata.org/entity/Q337443#siteli...,https://en.wikipedia.org/wiki/Thomas_Norton,https:https://www.worldcat.org/identities/lccn...,True,1530s,,1
lyly-euphues-anatomy-wit,https://shakespeareandco.princeton.edu/books/l...,Euphues: The Anatomy of Wit,"Lyly, John",,,,,,1578.0,Book,...,,John Lyly English politician,http://isni.org/isni/0000000108985156,https://www.wikidata.org/entity/Q261627#siteli...,https://en.wikipedia.org/wiki/John_Lyly,https:https://www.worldcat.org/identities/lccn...,True,1550s,,1
spenser-shepheards-calendar,https://shakespeareandco.princeton.edu/books/s...,The Shepheard's Calendar,"Spenser, Edmund",,,,,,1579.0,Book,...,,"Spenser, Edmund 1552-1599",http://isni.org/isni/0000000453028242,https://www.wikidata.org/entity/Q4352055#sitel...,https://en.wikipedia.org/wiki/Edmund_Spenser,https:https://www.worldcat.org/identities/lccn...,True,1550s,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
unclear-4,https://shakespeareandco.princeton.edu/books/u...,[unclear]y,,,,,,,,,...,,,,,,,,,,1
unidentified-benn-s-library,https://shakespeareandco.princeton.edu/books/u...,[unidentified Benn’s Library],,,,,,,,,...,,,,,,,,,,0
unknown,https://shakespeareandco.princeton.edu/books/u...,[unknown],,,,,,,,,...,,,,,,,,,,1
ford-transatlantic-review,https://shakespeareandco.princeton.edu/books/f...,the transatlantic review,,"Ford, Ford Madox",,,,,,Periodical,...,,,,,,,,,,2
