In [3]:
%load_ext autoreload

In [4]:
%autoreload 2

In [5]:
from utils.dataset_download import *
import pickle
import json
from pathlib import Path, PosixPath
import pandas as pd

In [6]:
# import API credentials
with open('oed_experiments/oed_credentials.json') as f:
    credentials = json.load(f)

In [7]:
# define lemma
lemma_id = "machine_nn01"

In [8]:
save_path = Path("./data")
save_path.mkdir(exist_ok=True)

In [9]:
#query the API and get the json response
sense_json = query_oed(credentials,'word',lemma_id,flags='include_senses=true&include_quotations=true')

# convert the json in a dataframe
senses_df = convert_json_to_dataframe(sense_json)

In [10]:
# save the dataframe
# as pickle
senses_df.to_pickle(save_path / f"senses_{lemma_id}.pickle")
# as csv
senses_df.to_csv(save_path / f"senses_{lemma_id}.tsv",sep='\t')

In [11]:
# open pickle file to avoid calling the API again
with open(save_path / f"senses_{lemma_id}.pickle",'rb') as in_pickle:
    machine_senses_df = pickle.load(in_pickle)

In [12]:
# get all senses that are siblings and descendants
# of the semantic class of senses listed in previously obtained query 
responses = traverse_thesaurus(credentials,machine_senses_df)

In [13]:
# traverse tree or load responses 
# responses = traverse_thesaurus(credentials,machine_senses_df)
with open('./data/tree_traversal.pickle','rb') as in_pickle:
    responses = pickle.load(in_pickle)

In [14]:
# get all quoations for the senses in the responses variable
quotations = get_quotations_from_thesaurus(credentials,responses)

HBox(children=(FloatProgress(value=0.0, max=1289.0), HTML(value='')))




In [15]:
# merge and save all information stored in the seperate pickle files
df = merge_pickled(Path("./data/senses_machine_nn01.pickle"),
                   Path("./data/tree_traversal.pickle"),
                   Path("./data/tree_traversal_quotations.pickle"))

In [17]:
df.to_pickle(f"./data/{lemma_id}_all.pickle")

In [18]:
df.shape

(7595, 16)

In [20]:
df[df.lemma=="machine"].shape

(805, 16)

In [24]:
len(set(df.sense_id))

1250

In [22]:
df[df.sense_id=="machine_nn01-38473945"].text

0      {'keyword': 'machyne', 'full_text': 'The hole ...
1      {'keyword': 'Machine', 'full_text': 'Machine, ...
2      {'keyword': 'machine', 'full_text': 'The maist...
3      {'keyword': 'machin', 'full_text': 'Be his wis...
4      {'keyword': 'Machine', 'full_text': 'They that...
5      {'keyword': 'Machine', 'full_text': 'Behind th...
6      {'keyword': 'machine', 'full_text': 'Her imper...
7      {'keyword': 'machine', 'full_text': 'Her new l...
8      {'keyword': 'machine', 'full_text': 'Had the w...
9      {'keyword': 'machine', 'full_text': 'The mind ...
10     {'keyword': 'machine', 'full_text': 'To each m...
405    {'keyword': 'machyne', 'full_text': 'The hole ...
406    {'keyword': 'Machine', 'full_text': 'Machine, ...
407    {'keyword': 'machine', 'full_text': 'The maist...
408    {'keyword': 'machin', 'full_text': 'Be his wis...
409    {'keyword': 'Machine', 'full_text': 'They that...
410    {'keyword': 'Machine', 'full_text': 'Behind th...
411    {'keyword': 'machine', '

In [19]:
df.head()

Unnamed: 0,oed_reference,lemma,oed_url,word_id,id_quotation,source,part_of_speech,sense_id,definition,notes,first_use,root,main_current_sense,semantic_class_ids,transitivity,text
0,"machine, n., sense I.1a",machine,https://www.oed.com/view/Entry/111850#eid38473945,machine_nn01,machine_nn01-38473945,"{'title': 'Early Mod. Eng. Lexicogr.', 'author...",NN,machine_nn01-38473945,"A material or immaterial structure, esp. the f...",[],J. Schäfer,True,False,"[[1, 111290, 118635, 119024, 120162, 120172], ...",,"{'keyword': 'machyne', 'full_text': 'The hole ..."
1,"machine, n., sense I.1a",machine,https://www.oed.com/view/Entry/111850#eid38473945,machine_nn01,machine_nn01-38473945,"{'title': 'Early Mod. Eng. Lexicogr.', 'author...",NN,machine_nn01-38473945,"A material or immaterial structure, esp. the f...",[],J. Schäfer,True,False,"[[1, 111290, 118635, 119024, 120162, 120172], ...",,"{'keyword': 'Machine', 'full_text': 'Machine, ..."
2,"machine, n., sense I.1a",machine,https://www.oed.com/view/Entry/111850#eid38473945,machine_nn01,machine_nn01-38473945,"{'title': 'Complaynt Scotl.', 'author': None, ...",NN,machine_nn01-38473945,"A material or immaterial structure, esp. the f...",[],J. Schäfer,True,False,"[[1, 111290, 118635, 119024, 120162, 120172], ...",,"{'keyword': 'machine', 'full_text': 'The maist..."
3,"machine, n., sense I.1a",machine,https://www.oed.com/view/Entry/111850#eid38473945,machine_nn01,machine_nn01-38473945,"{'title': 'Hymnes', 'author': 'A. Hume', 'gend...",NN,machine_nn01-38473945,"A material or immaterial structure, esp. the f...",[],J. Schäfer,True,False,"[[1, 111290, 118635, 119024, 120162, 120172], ...",,"{'keyword': 'machin', 'full_text': 'Be his wis..."
4,"machine, n., sense I.1a",machine,https://www.oed.com/view/Entry/111850#eid38473945,machine_nn01,machine_nn01-38473945,"{'title': 'Hist. Quinq-articularis', 'author':...",NN,machine_nn01-38473945,"A material or immaterial structure, esp. the f...",[],J. Schäfer,True,False,"[[1, 111290, 118635, 119024, 120162, 120172], ...",,"{'keyword': 'Machine', 'full_text': 'They that..."


## Fin.