## OED Data Selection

Notebook that retrieves relevant data for OED API.

For relevant documentation, look [here](https://languages.oup.com/research/oed-researcher-api/#see-documentation)

In [45]:
import requests
import pickle
import pandas as pd

In [44]:
creds = pickle.load(open('oed_keys.pkcl',"rb"))

In [74]:
def query_oed(endpoint:str,
          query:str,
          flags:str='',
          level:str='',
          verbose=True):
    """Get data from Oxford English Dictionary
    Arguments:
        endpoint (str): select which endpoint to query, examples are word, sense, semanticclass etc
        query (str): query for the specific endpoint, most often a specific id, such as 'machine_nn01' or '120172'
        flags (str): options appended to query to include, for example, quotations instead of quotation ids
                     example "include_senses=false&include_quotations=false"
        level (str): at which level to query the endpoint, 
                     e.g. get sense of the query word, get siblings for semantic class etc
                     standard value is empty string
    Returns:
        json of the response
    """
    
    base_url = "https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2"
    url = f"{base_url}/{endpoint}/{query}" # build url
    
    if flags and level:
        raise Exception("Define either flag or level\nThese options can not be used in combination")
    
    if level: # if a level has been specified add this to the url
        url = f"{url}/{level}/"
    
    if flags:
        url = f"{url}?{flags}"
        
    response = requests.get(url, headers = creds) 
    
    if verbose:
        print(url)
        
    if response.status_code == 200: # check status code 
        return response.json() # return the data as json
    
    else:
        raise Exception(f"Error while accessing the API\nResponse code={response.status_code}")
    


In [75]:
machine = query_oed('word','machine_nn01',flags="include_senses=true&include_quotations=true")

https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/word/machine_nn01?include_senses=true&include_quotations=true


In [92]:
machine['data']['senses'][1]

{'id': 'machine_nn01-38474233',
 'meta': {'created': 1904,
  'revised': True,
  'updated': 2000,
  'sense_group': 'machine_nn01-g03',
  'position_in_entry': 4},
 'lemma': 'machine',
 'notes': [],
 'oed_url': 'https://www.oed.com/view/Entry/111850#eid38474233',
 'word_id': 'machine_nn01',
 'daterange': {'end': None,
  'start': 1583,
  'obsolete': False,
  'rangestring': '1583—'},
 'first_use': 'Brian Melbancke',
 'categories': {'topic': [['Military', 'Weaponry']],
  'region': [],
  'register': [['historical']]},
 'definition': 'A military engine or siege-tower. Cf. war machine n. (a) at  war n.1 compounds 4. Now historical.',
 'transitivity': None,
 'oed_reference': 'machine, n., sense II.3',
 'part_of_speech': 'NN',
 'main_current_sense': False,
 'semantic_class_ids': [['153072',
   '160439',
   '163207',
   '163208',
   '163377',
   '163378']],
 'quotations': [{'id': 'machine_nn01-38474243',
   'text': {'keyword': 'machins',
    'full_text': 'For all that, their lucke was at that time

In [52]:
machine_senses = query_oed('word','machine_nn01','senses')
machine = query_oed('word','machine_nn01','quotations')
#

In [63]:
machine = query_oed('word','machine_nn01','')

https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/word/machine_nn01


In [49]:
machine_data['data'][0]

{'id': 'machine_nn01-38473961',
 'text': {'keyword': 'Machine',
  'full_text': 'Machine, hath many significacions, but here it is taken for the worke of the hole worlde.',
  'keyword_offset': 0},
 'year': 1545,
 'lemma': 'machine',
 'source': {'title': 'Early Mod. Eng. Lexicogr.',
  'author': 'J. Schäfer',
  'gender': None},
 'oed_url': 'https://www.oed.com/view/Entry/111850#eid38473961',
 'word_id': 'machine_nn01',
 'sense_id': 'machine_nn01-38473945',
 'datestring': '1545',
 'first_in_word': False,
 'oed_reference': 'machine, n., sense I.1a',
 'first_in_sense': False}

In [48]:
pd.DataFrame.from_dict(machine_data['data'])

Unnamed: 0,id,text,year,lemma,source,oed_url,word_id,sense_id,datestring,first_in_word,oed_reference,first_in_sense
0,machine_nn01-38473961,"{'keyword': 'Machine', 'full_text': 'Machine, ...",1545,machine,"{'title': 'Early Mod. Eng. Lexicogr.', 'author...",https://www.oed.com/view/Entry/111850#eid38473961,machine_nn01,machine_nn01-38473945,1545,False,"machine, n., sense I.1a",False
1,machine_nn01-38473950,"{'keyword': 'machyne', 'full_text': 'The hole ...",1545,machine,"{'title': 'Early Mod. Eng. Lexicogr.', 'author...",https://www.oed.com/view/Entry/111850#eid38473950,machine_nn01,machine_nn01-38473945,1545,True,"machine, n., sense I.1a",True
2,machine_nn01-38473973,"{'keyword': 'machine', 'full_text': 'The maist...",1550,machine,"{'title': 'Complaynt Scotl.', 'author': None, ...",https://www.oed.com/view/Entry/111850#eid38473973,machine_nn01,machine_nn01-38473945,c1550,False,"machine, n., sense I.1a",False
3,machine_nn01-38474243,"{'keyword': 'machins', 'full_text': 'For all t...",1583,machine,"{'title': 'Philotimus', 'author': 'B. Melbanck...",https://www.oed.com/view/Entry/111850#eid38474243,machine_nn01,machine_nn01-38474233,1583,False,"machine, n., sense II.3",True
4,machine_nn01-38474102,"{'keyword': 'machines', 'full_text': 'In wordz...",1595,machine,"{'title': 'Let. to James VI', 'author': 'Queen...",https://www.oed.com/view/Entry/111850#eid38474102,machine_nn01,machine_nn01-38474097,1595–6,False,"machine, n., sense I.1b",True
...,...,...,...,...,...,...,...,...,...,...,...,...
214,machine_nn01-38476264,"{'keyword': 'machine for living in', 'full_tex...",1927,machine for living,"{'title': 'Towards New Archit.', 'author': Non...",https://www.oed.com/view/Entry/111850#eid38476264,machine_nn01,machine_nn01-38476245,1927,False,"machine, n., sense V.8c",True
215,machine_nn01-38476281,"{'keyword': None, 'full_text': 'Le Corbusier h...",1934,machine for living,"{'title': 'Beyond Mexique Bay', 'author': 'A. ...",https://www.oed.com/view/Entry/111850#eid38476281,machine_nn01,machine_nn01-38476245,1934,False,"machine, n., sense V.8c",False
216,machine_nn01-38476289,"{'keyword': 'Machine-for-Living', 'full_text':...",1960,machine for living,"{'title': 'Dymaxion World Buckminster Fuller',...",https://www.oed.com/view/Entry/111850#eid38476289,machine_nn01,machine_nn01-38476245,1960,False,"machine, n., sense V.8c",False
217,machine_nn01-38476297,"{'keyword': None, 'full_text': 'The whole hous...",1966,machine for living,"{'title': 'Nell Alone', 'author': '‘J. Melvill...",https://www.oed.com/view/Entry/111850#eid38476297,machine_nn01,machine_nn01-38476245,1966,False,"machine, n., sense V.8c",False


In [27]:
type(data.status_code)

int

In [35]:
machine_data["data"][0]

{'id': 'machine_nn01-38473945',
 'meta': {'created': 1904,
  'revised': True,
  'updated': 2000,
  'sense_group': 'machine_nn01-g01',
  'position_in_entry': 1},
 'lemma': 'machine',
 'notes': [],
 'oed_url': 'https://www.oed.com/view/Entry/111850#eid38473945',
 'word_id': 'machine_nn01',
 'daterange': {'end': None,
  'start': 1545,
  'obsolete': False,
  'rangestring': '1545—'},
 'first_use': 'J. Schäfer',
 'categories': {'topic': [], 'region': [], 'register': [['rare']]},
 'definition': 'A material or immaterial structure, esp. the fabric of the world or of the universe; a construction or edifice. Now rare.',
 'transitivity': None,
 'oed_reference': 'machine, n., sense I.1a',
 'quotation_ids': ['machine_nn01-38473950',
  'machine_nn01-38473961',
  'machine_nn01-38473973',
  'machine_nn01-38473981',
  'machine_nn01-38473999',
  'machine_nn01-38474007',
  'machine_nn01-38474047',
  'machine_nn01-38474058',
  'machine_nn01-38474068',
  'machine_nn01-38474079',
  'machine_nn01-38474089'],

In [93]:
semclass_child = query_oed('semanticclass','163378','children')

https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/semanticclass/163378?children


In [97]:
c,level='senses')

https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/semanticclass/163378/senses/


In [102]:
query_oed('semanticclass','163378')

https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/semanticclass/163378


{'meta': {'provider': 'Oxford University Press',
  'cite': 'Oxford English Dictionary API, Oxford University Press, 0.0.0.0/oed/api/v0.2/semanticclass/163378 . Accessed 15 September 2020'},
 'links': {'self': '/oed/api/v0.2/semanticclass/163378'},
 'data': {'id': '163378',
  'label': None,
  'level': 6,
  'oed_url': 'https://www.oed.com/view/th/class/163378',
  'child_ids': ['163379',
   '163380',
   '163381',
   '163382',
   '163383',
   '163384',
   '163385',
   '163393',
   '163394'],
  'node_size': 5,
  'sense_ids': ['engine_nn01-5410103',
   'gun_nn01-2345301',
   'machinament_nn01-38472847',
   'machine_nn01-38474233',
   'machination_nn01-38473507'],
  'breadcrumb': 'society » armed hostility » military equipment » weapon » engine of war » [noun]',
  'parent_ids': ['153072', '160439', '163207', '163208', '163377'],
  'branch_size': 82,
  'sibling_ids': [],
  'part_of_speech': 'NN'}}

In [101]:
sc['']

{'meta': {'total': 5,
  'count': 5,
  'offset': 0,
  'limit': 10000,
  'provider': 'Oxford University Press',
  'cite': 'Oxford English Dictionary API, Oxford University Press, 0.0.0.0/oed/api/v0.2/semanticclass/163378/senses/?offset=0&limit=10000 . Accessed 15 September 2020'},
 'links': {'self': '/oed/api/v0.2/semanticclass/163378/senses/?offset=0&limit=10000',
  'prev': None,
  'next': None,
  'first': '/oed/api/v0.2/semanticclass/163378/senses/?offset=0&limit=10000',
  'last': '/oed/api/v0.2/semanticclass/163378/senses/?offset=0&limit=10000'},
 'data': [{'id': 'engine_nn01-5410103',
   'meta': {'created': 1891,
    'revised': True,
    'updated': 2010,
    'sense_group': 'engine_nn01-g04',
    'position_in_entry': 6},
   'lemma': 'engine',
   'notes': ['battering, siege engine: see the first element.'],
   'oed_url': 'https://www.oed.com/view/Entry/62223#eid5410103',
   'word_id': 'engine_nn01',
   'daterange': {'end': None,
    'start': 1380,
    'obsolete': False,
    'rangestrin

In [100]:
[sc['data'][i]['lemma'] for i in range(len(sc))]

['engine', 'gun', 'machinament']

In [94]:
semclass_data["data"][0]

{'id': '120173',
 'label': 'immaterial',
 'level': 7,
 'oed_url': 'https://www.oed.com/view/th/class/120173',
 'child_ids': [],
 'node_size': 4,
 'sense_ids': ['machinament_nn01-38472847',
  'machine_nn01-38473945',
  'machina_nn01-38472486',
  'cadre_nn01-10918875'],
 'breadcrumb': 'the world » relative properties » wholeness » mutual relation of parts to whole » [noun] » a structure » immaterial',
 'parent_ids': ['1', '111290', '118635', '119024', '120162', '120172'],
 'branch_size': 4,
 'sibling_ids': ['120174', '120175'],
 'part_of_speech': 'NN'}

In [53]:
semclass_siblings = query_oed('semanticclass','120172','senses')

In [56]:
semclass_siblings['data'][0]

{'id': 'arrangement_nn01-39073466',
 'meta': {'created': 1885,
  'revised': False,
  'updated': None,
  'sense_group': 'arrangement_nn01-g04',
  'position_in_entry': 4},
 'lemma': 'arrangement',
 'notes': [],
 'oed_url': 'https://www.oed.com/view/Entry/10968#eid39073466',
 'word_id': 'arrangement_nn01',
 'daterange': {'end': None,
  'start': 1800,
  'obsolete': False,
  'rangestring': '1800—'},
 'first_use': 'William Herschel',
 'categories': {'topic': [], 'region': [], 'register': []},
 'definition': 'concrete. A structure or combination of things arranged in a particular way or for any purpose; hence loosely, like affair, concern, production.',
 'transitivity': None,
 'oed_reference': 'arrangement, n., sense 4',
 'quotation_ids': ['arrangement_nn01-39073473',
  'arrangement_nn01-39073483',
  'arrangement_nn01-39073492'],
 'part_of_speech': 'NN',
 'main_current_sense': False,
 'semantic_class_ids': [['1', '97163', '101153', '104182', '104802', '104811'],
  ['1', '111290', '118635', '1

In [None]:
url_senses = 'https://oed-researcher-api.oxfordlanguages.com/oed/api/v0.2/word/machine_nn01/senses/'
r_senses = requests.get(url_senses, 