# Synthesizing data with time-sensitive location data

## Reconstructing books/members/events

In [1]:
import sys; sys.path.insert(0,'..')
from geotaste import *

In [2]:
def get_book_id(x): return x.split('/books/',1)[1][:-1] if '/books/' in x else ''

@cache
def get_books_df(): 
    df=get_urlpath_df('books').fillna('')
    df['book_id']=df.uri.apply(get_book_id)
    return df.set_index('book_id')

In [3]:
df_books=get_books_df()
# df_books

In [4]:
def get_member_id(x): return x.split('/members/',1)[1][:-1] if '/members/' in x else ''

@cache
def get_members_df(): 
    df=get_urlpath_df('members').fillna('')
    df['is_expat'] = df['nationalities'].apply(lambda x: 'France' not in x)
    df['has_wikipedia'] = df['wikipedia_url']!=''
    df['has_viaf'] = df['viaf_url']!=''
    df['birth_decade'] = [str(x)[:3]+'0s' if x else '' for x in df['birth_year']]
    df['generation'] = df['birth_year'].apply(parse_generation)
    df['member_id']=df.uri.apply(get_member_id)
    return df.set_index('member_id')

In [5]:
df_members = get_members_df()
# df_members

In [6]:
def get_event_id(row):
    base=f'{row.member_id} {(row.event_type+"s").upper()} {row.book_id}'
    if not row.start_date: return base
    if row.end_date and row.start_date!=row.end_date:
        return f'{base} FROM {row.start_date} TO {row.end_date}'
    return f'{base} ON {row.start_date}'

@cache
def get_events_df(): 
    # get as exists
    df = get_urlpath_df('events').fillna('')
    
    ## expand by semi colon!
    def getrowinfo(row, i):
        odx={}
        for k,v in dict(row).items():
            if ';' in str(v):
                vs = str(v).split(';')
                v=vs[i]
            odx[k]=v
        return odx
    
    df_events_expanded = pd.DataFrame(
        {**getrowinfo(row,mi), 'member_uri':muri, 'member_id':get_member_id(muri), 'book_id':get_book_id(row.item_uri)}
        for i,row in df.iterrows()
        for mi,muri in enumerate(row.member_uris.split(';'))
    )
    
    odf = df_events_expanded[['member_id','book_id']+[col for col in df if not col.split('_')[0] in {'member','item'}]]
    odf['event_id'] = odf.apply(get_event_id,axis=1)
    return odf.set_index('event_id')


In [7]:
df_events = get_events_df()
df_events

Unnamed: 0_level_0,member_id,book_id,event_type,start_date,end_date,subscription_price_paid,subscription_deposit,subscription_duration,subscription_duration_days,subscription_volumes,...,subscription_purchase_date,reimbursement_refund,borrow_status,borrow_duration_days,purchase_price,currency,source_type,source_citation,source_manifest,source_image
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
linossier-raymonde GENERICS butler-pigs-pigs ON 1920,linossier-raymonde,butler-pigs-pigs,Generic,1920,,,,,,,...,,,,,,,Lending Library Card,"Sylvia Beach, Raymonde Linossier Lending Libra...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/00...
garreta SUBSCRIPTIONS ON 1921,garreta,,Subscription,1921,,,,,,,...,1921,,,,,FRF,Address Book,"Sylvia Beach, Address Book 1919–1935, box 69, ...",,
rhys BORROWS conrad-typhoon FROM 1922 TO 1922-08-23,rhys,conrad-typhoon,Borrow,1922,1922-08-23,,,,,,...,,,Returned,,,,Lending Library Card,"Sylvia Beach, Rhys Lending Library Card, Box 4...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/67...
walsh GENERICS bennett-pretty-lady ON 1922,walsh,bennett-pretty-lady,Generic,1922,,,,,,,...,,,,,,,Lending Library Card,"Sylvia Beach, Ernest Walsh Lending Library Car...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/af...
lincoln-1 SUBSCRIPTIONS ON 1922,lincoln-1,,Subscription,1922,,,7.0,,,,...,1922,,,,,FRF,Address Book,"Sylvia Beach, Address Book 1919–1935, box 69, ...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
gutmann-henri REIMBURSEMENTS,gutmann-henri,,Reimbursement,,,,,,,,...,,,,,,,Address Book,"Sylvia Beach, Address Book 1919–1935, box 69, ...",,
bernheim-antoinette GENERICS isherwood-lions-shadows-education,bernheim-antoinette,isherwood-lions-shadows-education,Generic,,,,,,,,...,,,,,,,Lending Library Card,"Sylvia Beach, Antoinette Bernheim Lending Libr...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif-cloud.princeton.edu/iiif/2/eb%2F7...
baldwin-mr REIMBURSEMENTS,baldwin-mr,,Reimbursement,,,,,,,,...,,,,,,,Address Book,"Sylvia Beach, Address Book 1919–1935, box 69, ...",,
bernheim-antoinette GENERICS forster-longest-journey,bernheim-antoinette,forster-longest-journey,Generic,,,,,,,,...,,,,,,,Lending Library Card,"Sylvia Beach, Antoinette Bernheim Lending Libr...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif-cloud.princeton.edu/iiif/2/eb%2F7...


In [8]:
@cache
def get_borrow_events_df():
    df = get_events_df()
    df = df[df.event_type=='Borrow']
    df = df[[c for c in df if c.split('_')[0] not in {'subscription','reimbursement','purchase','currency'} and c not in {'event_type'}]]
    return df
    

In [9]:
df_borrows = get_borrow_events_df()
df_borrows

Unnamed: 0_level_0,member_id,book_id,start_date,end_date,borrow_status,borrow_duration_days,source_type,source_citation,source_manifest,source_image
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rhys BORROWS conrad-typhoon FROM 1922 TO 1922-08-23,rhys,conrad-typhoon,1922,1922-08-23,Returned,,Lending Library Card,"Sylvia Beach, Rhys Lending Library Card, Box 4...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/67...
lanux-eyre-de BORROWS woolf-night-day FROM 1923 TO 1923-11-07,lanux-eyre-de,woolf-night-day,1923,1923-11-07,Returned,,Lending Library Card,"Sylvia Beach, Eyre de Lanux Lending Library Ca...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/c5...
tery BORROWS james-joyce FROM 1924 TO 1924-10-22,tery,james-joyce,1924,1924-10-22,Returned,,Lending Library Card,"Sylvia Beach, Mlle Simone Tery Lending Library...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/c2...
tery BORROWS freeman-portrait-george-moore FROM 1924 TO 1924-10-22,tery,freeman-portrait-george-moore,1924,1924-10-22,Returned,,Lending Library Card,"Sylvia Beach, Mlle Simone Tery Lending Library...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/c2...
macleish-ada BORROWS stern-tents-israel FROM 1926 TO 1926-01-14,macleish-ada,stern-tents-israel,1926,1926-01-14,Returned,,Lending Library Card,"Sylvia Beach, Ada and Archibald MacLeish Lendi...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/22...
...,...,...,...,...,...,...,...,...,...,...
hommel-bernard BORROWS heine-poems,hommel-bernard,heine-poems,,,Returned,,Lending Library Card,"Sylvia Beach, Bernard Hommel Lending Library C...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/e4...
hommel-bernard BORROWS cowley-exiles-return-narrative,hommel-bernard,cowley-exiles-return-narrative,,,Returned,,Lending Library Card,"Sylvia Beach, Bernard Hommel Lending Library C...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/e4...
hommel-bernard BORROWS wilson-axels-castle-study,hommel-bernard,wilson-axels-castle-study,,,Returned,,Lending Library Card,"Sylvia Beach, Bernard Hommel Lending Library C...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/e4...
hommel-bernard BORROWS city-dreadful-night,hommel-bernard,city-dreadful-night,,,Returned,,Lending Library Card,"Sylvia Beach, Bernard Hommel Lending Library C...",https://figgy.princeton.edu/concern/scanned_re...,https://iiif.princeton.edu/loris/figgy_prod/e4...


## Finding dwellings

In [30]:
def get_dwelling_id(row):
    o=f'{row.member_id} DWELT AT {row.street_address}'
    if row.start_date: o+=f' FROM {row.start_date}'
    if row.end_date: o+=f' UNTIL {row.end_date}'
    return o
    

@cache
def get_dwellings_df(): 
    df=get_urlpath_df('dwellings').fillna('')
    startcols = ['member_id','start_date','end_date']
    endcols = [c for c in df if c.endswith('_id')]
    df['member_id']=df.member_uri.apply(get_member_id)
    odf=df[startcols + [c for c in df if c not in set(startcols+endcols)] + endcols]
    odf['dwelling_id'] = odf.apply(get_dwelling_id, axis=1)
    return odf.set_index('dwelling_id')
    

In [31]:
df_dwellings = get_dwellings_df()
df_dwellings

Unnamed: 0,member_id,start_date,end_date,member_uri,start_date_precision,end_date_precision,street_address,city,postal_code,latitude,longitude,arrrondissement,person_id,account_id,address_id,location_id,care_of_person_id,country_id
0,abbott-berenice,,,https://shakespeareandco.princeton.edu/members...,,,44 rue du Bac,Paris,75007,48.85681,2.32644,7.0,9985,7151,1237,121,,2.0
1,abbott,,,https://shakespeareandco.princeton.edu/members...,,,246 boulevard Raspail,Paris,75014,48.83818,2.33049,14.0,2244,1583,1238,444,,2.0
2,ernst-abel,,,https://shakespeareandco.princeton.edu/members...,,,36 rue Bonaparte,Paris,75006,48.85509,2.33358,6.0,9996,7158,1239,192,,2.0
3,yvonne-abelin,,,https://shakespeareandco.princeton.edu/members...,,,7 rue de l’Odéon,Paris,75006,48.85118,2.33879,6.0,10000,7161,1240,917,,2.0
4,ably-2,,,https://shakespeareandco.princeton.edu/members...,,,10 bis avenue de la Grande Armée,Paris,75017,48.81119,2.2705,17.0,1288,902,1241,834,,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3223,van-den-bergh,,,https://shakespeareandco.princeton.edu/members...,,,9 avenue Frédéric le Play,Paris,75007,48.85433,2.30431,7.0,562,5096,789,731,,2.0
3224,van-den-bergh,,,https://shakespeareandco.princeton.edu/members...,,,,Courmayeur,,45.79692,6.96896,,562,5096,790,732,,5.0
3225,van-den-bergh,,,https://shakespeareandco.princeton.edu/members...,,,72 avenue de La Bourdonnais,Paris,75007,48.85503,2.30394,7.0,562,5096,791,733,,2.0
3226,van-den-bergh,,,https://shakespeareandco.princeton.edu/members...,,,91 rue Olivier de Serres,Paris,75015,48.83205,2.29385,15.0,562,5096,792,734,,2.0


In [1]:
get_combined_df??

Object `get_combined_df` not found.


In [2]:
df_members = get_members_df()

NameError: name 'get_members_df' is not defined