# Review Notebook for Sense Filtering by Time and Provenance

This notebook builds on 4.1 (harvest senses with provenance) and 4.2 (harvest quotations for senses). The code in this notebook allows you to filter senses by (a) selecting seed senses (b) defining relations words should have to the seed senses.

Functions reviewed in this notebook:
- `filter_by_year_range` (helper)
- `select_senses_by_provenance`(helper)
- `filter_senses` (main)

Part of:
- `utils.dataset_download`

Creator: Kaspar Beelen

Reviewer(s):


In [1]:
!git branch

  19-machine-tagger[m
* [32m4-semantic-provenance[m
  44-kNN-BERT-baseline[m
  dev[m
  master[m


## Load libraries, data and set parameters

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [48]:
import pandas as pd
import json
from utils.dataset_download import *

In [5]:
lemma_id = 'machine_nn01'

In [6]:
# import API credentials
with open('./oed_experiments/oed_credentials.json') as f:
    auth = json.load(f)

In [24]:
df = pd.read_pickle(f'./data/extended_{lemma_id}.pickle')

In [8]:
df.head(3)

Unnamed: 0,categories,daterange,definition,first_use,id,lemma,main_current_sense,meta,notes,oed_reference,oed_url,part_of_speech,provenance,provenance_type,quotation_ids,semantic_class_ids,semantic_class_last_id,transitivity,word_id
0,"{'topic': [], 'usage': [['rare']], 'region': []}","{'end': None, 'start': 1545, 'obsolete': False...","A material or immaterial structure, esp. the f...",J. Schäfer,machine_nn01-38473945,machine,False,"{'created': 1904, 'revised': True, 'updated': ...",[],"machine, n., sense I.1a",https://www.oed.com/view/Entry/111850#eid38473945,NN,"[[machine_nn01-38473945, seed, machine_nn01]]",seed,"[machine_nn01-38473950, machine_nn01-38473961,...","[[1, 111290, 118635, 119024, 120162, 120172], ...","[120172, 120173]",,machine_nn01
1,"{'topic': [['Military', 'Weaponry']], 'usage':...","{'end': None, 'start': 1583, 'obsolete': False...",A military engine or siege-tower. Cf. war mach...,Brian Melbancke,machine_nn01-38474233,machine,False,"{'created': 1904, 'revised': True, 'updated': ...",[],"machine, n., sense II.3",https://www.oed.com/view/Entry/111850#eid38474233,NN,"[[machine_nn01-38474233, seed, machine_nn01]]",seed,"[machine_nn01-38474243, machine_nn01-38474252,...","[[153072, 160439, 163207, 163208, 163377, 1633...",[163378],,machine_nn01
2,"{'topic': [], 'usage': [], 'region': []}","{'end': 1707, 'start': 1595, 'obsolete': True,...",spec. A scheme or plot. Obsolete.,Elizabeth I,machine_nn01-38474097,machine,False,"{'created': 1904, 'revised': True, 'updated': ...",[],"machine, n., sense I.1b",https://www.oed.com/view/Entry/111850#eid38474097,NN,"[[machine_nn01-38474097, seed, machine_nn01]]",seed,"[machine_nn01-38474102, machine_nn01-38474122,...","[[1, 84689, 87987, 87988, 87989, 88083, 88109,...",[88126],,machine_nn01


In [60]:
df_quotations = pd.read_pickle(f'./data/quotations_all_{lemma_id}.pickle')
df_quotations.shape

(203560, 12)

In [10]:
df_quotations.head()

Unnamed: 0,id,text,year,lemma,source,oed_url,word_id,sense_id,datestring,first_in_word,oed_reference,first_in_sense
0,pigmeat_nn01-13163366,"{'keyword': 'pig-meat', 'full_text': 'I was at...",1754,pigmeat,"{'title': 'Connoisseur', 'author': 'G. Colman'...",https://www.oed.com/view/Entry/237320#eid13163366,pigmeat_nn01,pigmeat_nn01-13163363,1754,True,"pigmeat, n., sense 1",True
1,pigmeat_nn01-13163379,"{'keyword': 'pig-meat', 'full_text': 'In short...",1784,pigmeat,"{'title': 'Year's Journey through Paix Bâs', '...",https://www.oed.com/view/Entry/237320#eid13163379,pigmeat_nn01,pigmeat_nn01-13163363,1784,False,"pigmeat, n., sense 1",False
2,pigmeat_nn01-13163399,"{'keyword': 'pig meat', 'full_text': 'It preve...",1817,pigmeat,"{'title': 'Parl. Deb.', 'author': None, 'gende...",https://www.oed.com/view/Entry/237320#eid13163399,pigmeat_nn01,pigmeat_nn01-13163363,1817,False,"pigmeat, n., sense 1",False
3,pigmeat_nn01-13163416,"{'keyword': 'pig meat', 'full_text': 'In most ...",1897,pigmeat,"{'title': 'Syst. Med.', 'author': 'T. C. Allbu...",https://www.oed.com/view/Entry/237320#eid13163416,pigmeat_nn01,pigmeat_nn01-13163363,1897,False,"pigmeat, n., sense 1",False
4,pigmeat_nn01-13163425,"{'keyword': 'pig meat', 'full_text': 'Beef tak...",1918,pigmeat,"{'title': 'Times', 'author': None, 'gender': N...",https://www.oed.com/view/Entry/237320#eid13163425,pigmeat_nn01,pigmeat_nn01-13163363,1918,False,"pigmeat, n., sense 1",False


# Run code

In [51]:
set(df[df.id.isin(['machine_nn01-38475835','machine_nn01-38475923'])].definition)

{'A conceptual, abstract, or theoretical mechanism or device; spec. a model or a mathematical abstraction of an existing or hypothetical computer. Cf. Turing machine n.',
 'Mechanics. Anything that transmits force or directs its application.'}

In [102]:
senses = filter_senses(df,
                       {'machine_nn01-38475835','machine_nn01-38475923'},
                       relations = ['seed','synonym','descendant','sibling'], # 'all',
                       skip_synonyms=False,
                       start=1760, 
                       end=1920
                      )

# senses before filtering by date = 8383
# senses after filtering by date = 5918


In [54]:
len(senses)

29

In [57]:
df_source = pd.read_pickle("./data/extended_machine_nn01.pickle")

In [94]:
quotations = obtain_quotations_for_senses(df_quotations,
                                df_source,                  
                                senses,
                                start=1760,end=1920)
quotations.shape

(178, 13)

In [95]:
quotations.head()

Unnamed: 0,keyword,full_text,keyword_offset,title,author,gender,year,sense_id,daterange,provenance,provenance_type,relation_to_core_senses,relation_to_seed_senses
0,wedge,"The sharper the wedge, or the more acute its a...",16.0,Princ. Mech.,W. Emerson,male,1773,wedge_nn01-14811588,"{'end': None, 'start': 725, 'obsolete': False,...","[[214221, descendant, 214220]]",branch,"{machine_nn01-38475923, power_nn01-28687898}",{machine_nn01-38475923}
1,wedge,Forth goes the woodman..To wield the axe And d...,55.0,Task,W. Cowper,male,1785,wedge_nn01-14811588,"{'end': None, 'start': 725, 'obsolete': False,...","[[214221, descendant, 214220]]",branch,"{machine_nn01-38475923, power_nn01-28687898}",{machine_nn01-38475923}
2,wedges,The distended ropes..are then confined at the ...,57.0,Archit.,W. Newton,,1789,wedge_nn01-14811588,"{'end': None, 'start': 725, 'obsolete': False,...","[[214221, descendant, 214220]]",branch,"{machine_nn01-38475923, power_nn01-28687898}",{machine_nn01-38475923}
3,wedges,The wedges employed to secure the rails in the...,4.0,Minutes Proc. Inst. Civil Engineers,,,1842,wedge_nn01-14811588,"{'end': None, 'start': 725, 'obsolete': False,...","[[214221, descendant, 214220]]",branch,"{machine_nn01-38475923, power_nn01-28687898}",{machine_nn01-38475923}
4,Wedge,"Wedge,..a small fastening for a door or window.",0.0,Dict. Trade Products,P. L. Simmonds,male,1858,wedge_nn01-14811588,"{'end': None, 'start': 725, 'obsolete': False,...","[[214221, descendant, 214220]]",branch,"{machine_nn01-38475923, power_nn01-28687898}",{machine_nn01-38475923}


In [96]:
set().union(*quotations.relation_to_core_senses)

{'energizer_nn01-1246574970',
 'generator_nn01-135539783',
 'machine_nn01-38475923',
 'machinepower_nn01-38476982',
 'mechanicaladvantage_nn01-37502752',
 'mover_nn01-35820685',
 'power_nn01-28687898',
 'primemover_nn01-28348676',
 'primummobile_nn01-28369573',
 'vice_nn02-15526247'}

In [97]:
set().union(*quotations.relation_to_seed_senses)

{'machine_nn01-38475923'}

# Fin.