# Classifying BH Time Constructions

There are three basic constructions:

* non-prepositional time constructions
* prepositional time constructions
* multiphrasal time constructions

## Load Modules and Data

In [1]:
# Text-Fabric processor and tools
from tf.fabric import Fabric
from tf.app import use
from tools.locations import data_locations

# stats & data-containers
import collections, random, csv, re
import pandas as pd
import numpy as np
import scipy.stats as stats
from sklearn.decomposition import PCA

# import custom tools
from tools.significance import contingency_table, apply_fishers
from tools.pca import plot_PCA
from tools.helpers import convert2pandas, show_subphrases
from tools.tokenizers import tokenize_surface
from tools.time import Time
from tools.visualize import reverse_hb, barplot_counts

# data visualizations
import seaborn as sns
sns.set(font_scale=1.5, style='whitegrid')
import matplotlib.pyplot as plt
from IPython.display import display, HTML

# load custom BHSA data + heads
TF = Fabric(locations=data_locations.values())
load_features = ['g_cons_utf8', 'trailer_utf8', 'label', 'lex',
                 'role', 'rela', 'typ', 'function', 'language',
                 'pdp', 'gloss', 'vs', 'vt', 'nhead', 'head', 
                 'mother', 'nu', 'prs', 'sem_set', 'ls', 'st',
                 'kind', 'top_assoc', 'number']
api = TF.load(' '.join(load_features))
F, E, T, L = api.F, api.E, api.T, api.L # shortform TF methods

 # configure Hebrew displaying
A = use('bhsa', api=api, silent=True)
A.displaySetup(condenseType='clause', withNodes=True)

This is Text-Fabric 7.8.7
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

123 features found and 6 ignored
  0.00s loading features ...
   |     0.00s No structure info in otext, the structure part of the T-API cannot be used
  6.99s All features loaded/computed - for details use loadLog()


# Basic Exploration

The analysis looks at chunk objects with `label=timephrase`. Aramaic time constructions are excluded. Below we print the total number of such objects.

In [2]:
times = A.search('''

chunk label=timephrase

''', shallow=True)

  0.08s 3881 results


In [3]:
surfaces = collections.Counter()

for cx in times:
    surface_token = tokenize_surface(cx, api)
    surfaces[surface_token] += 1
    
surfaces = convert2pandas(surfaces)

In [4]:
print(f'{surfaces.shape[0]} unique surface forms found')

1167 unique surface forms found


In [5]:
surfaces.head(50)

Unnamed: 0,Total
עתה,342
ב.ה.יום.ה.הוא,203
ה.יום,191
ל.עולם,85
ב.ה.בקר,78
עד.ה.יום.ה.זה,71
ב.יום,69
אז,66
שׁבעת.ימים,63
עד.עולם,53


This top list accounts for a substantial proportion of all known time adverbials in the dataset:

In [6]:
surfaces.head(50).sum()[0] / surfaces.sum()[0]

0.545735635145581

The >50% representation accounted for in the top 50/~1100 forms shows that this surface count table contains most of the key constructional elements for a TIME taxonomy.

## Distinguish Single Phrase and Multi-Phrasal Time Constructions

### Single Phrase Constructions

In [7]:
singles = []
multi = []

for tc in F.label.s('timephrase'):
    
    nphrases = L.d(tc, 'phrase')
    nphraseatoms = L.d(tc, 'phrase_atom')
    times = [r for r in E.role.t(tc) if r[1]=='time']

    if all([len(nphrases) == 1, 
            len(nphraseatoms) == 1,
            len(times) == 1]):
        singles.append(tc)
        
    else:
        multi.append(tc)
        
print(f'{len(singles)} single phrasal constructions found...')
print(f'{len(multi)} multi phrasal constructions found...')

3352 single phrasal constructions found...
529 multi phrasal constructions found...


I will manual inspect single cases that are randomly selected. I eliminate cases wherein a time construction consists of only a single word.

In [8]:
# inspect = [r for r in singles if len(L.d(r[0], 'word')) > 1]
# print(len(inspect), 'cases ready for manual inspection...')

In [9]:
# random.shuffle(multi)

In [10]:
# A.show(multi)

# Classifications

The classifications are added to a dictionary, keyed by name, to keep track of which constructions are accounted for. The process is one of elimination and deduction based on features identified in an inductive analysis.

In [11]:
classes = {}

In [12]:
total_cxs = len(singles) + len(multi)

def percent(n, total):
    return round(n/total, 2)

def prog():
    '''Reports progress in %'''
    found = set(cx for cls, clset in classes.items() for cx in clset)
    ratio = percent(len(found), total_cxs)
    print(f'\t{ratio} ({len(found)}) now accounted for')
    
def get_remaining(cx_list):
    '''Returns remaining adverbials 
    (not classified) from a list'''
    found = set(cx for cls, clset in classes.items() for cx in clset)
    return set(cx_list) - found

def remaining_tags(cx_list):
    '''
    Provides a count of the remaining construction tags 
    that have yet to be accounted for.
    '''
    remaining = get_remaining(cx_list)
    rem_tags = collections.Counter()
    for cx in remaining:
        time = Time(cx, api)
        rem_tags[time.tag] += 1
    return convert2pandas(rem_tags)

def get_cx(node):
    '''Returns a chunk node'''
    return [ch for ch in L.u(node, 'chunk') if F.label.v(ch) == 'timephrase'][0]

class CXdata:
    '''
    Makes count and result data available on
    a given construction class. The class is
    identified through a supplied search function.
    '''
    def __init__(self, cx_set, validate, tokenize):
        
        self.timeset = set()
        counts = collections.Counter()
        self.results = collections.defaultdict(list)
    
        for cx in cx_set:
            time = Time(cx, api)
            if validate(time):
                count_text = tokenize(time)
                counts[count_text] += 1
                self.timeset.add(cx)
                result = [cx] + list(L.d(cx, 'word'))
                self.results[count_text].append(result)
                self.results['all'].append(result)
                
        self.counts = convert2pandas(counts)
        
        print(f'{len(self.timeset)} matching constructions found...')

## Single Phrase, Non-Prepositional

### The Non-Prepositional (ø) "Adverb Construction"


The adverb construction is a single-word construction that is anchored to speech time. The lack of any specifiers such as demonstratives, definite articles, and other specifications means that these words must necessarily be anchored to an implied common point. This gives rise to a construction that can be considered an "adverb construction." Is this phenomenon discussed in the literature? Do other scholars recognize the association between modifiers and anchoredness?

These constructions are selected simply by finding those with only one specification: `time`.

In [13]:
def check_advb(time):
    '''Checks for adverb status'''
    if time.tag == 'time':        
        return True

def token_advb(time):
    '''Tokenizes adverb cx'''
    text = tokenize_surface(time.cx, api)
    return text

np_adverbs = CXdata(singles, check_advb, token_advb)

classes['ø_adverb'] = np_adverbs.timeset

prog()

603 matching constructions found...
	0.16 (603) now accounted for


In [14]:
np_adverbs.counts

Unnamed: 0,Total
עתה,342
אז,66
לילה,41
אחר,34
מחר,31
תמיד,30
יומם,20
עולם,6
טרם,4
בקר,4


In [15]:
#A.show(np_adverbs.results['חדשׁ'], condenseType='sentence')

**TODO**: NB the example above. Is it indeed an adverbial construction, or a simple use of the durative?

### The ø Construct Anchor Construction

Times can be anchored to a noun, verb, or suffix that is recoverable from the context. Anchors possibly occur across many main time construction types (as shown below). The patterns identified below consist of a basic `time + anchor` sequence. This pattern is different from the adverb construction identified above in that they are not inherently deictic. That is, the time anchor is specified, not assumed. Yet the pattern is distinct from the Demonstrative Heh construction and the Attributive Anchor construction.

In [16]:
def val_ancacc(time):
    '''Validates an anchored accusative construction'''
    
    this_time = time.times[0]
    p1 = this_time+1 if F.lex.v(this_time+1) != 'H' else this_time+2
    if all([time.tag.startswith('time.construct'),
            F.nu.v(p1) == 'sg' or F.pdp.v(p1) == 'verb']):
        return True
    
def token_ancacc(time):
    return F.lex_utf8.v(time.times[0])

ancacc = CXdata(singles, val_ancacc, token_ancacc)

7 matching constructions found...


In [17]:
ancacc.counts

Unnamed: 0,Total
יום,3
מועד,1
עת,1
רביעי,1
שׁבת,1


In [18]:
#A.show(ancacc.results['all'], condenseType='sentence')

#### Problematic Case of שׁבת שׁבת

This pattern is identified in the search because it consists of שבת + construct + שׁבת, as it is represented in the MT. Yet this is a unique rendering of a construction which is otherwise clearly a multi-phrasal, Repetition Durative Construction:

In [19]:
cbt_cbt = [r for r in ancacc.results['all'] if T.sectionFromNode(r[0]) == ('1_Chronicles', 9, 32)][0]

#A.show([cbt_cbt])

As such, this case is removed from this set.

In [20]:
ancacc.timeset.remove(cbt_cbt[0])

len(ancacc.timeset)

6

The remaining set is added.

In [21]:
classes['ø_construct_anchor'] = ancacc.timeset

prog()

	0.16 (609) now accounted for


#### [Research and defense of this construction here]

### The Demonstrative Heh Construction

This construction is unique, since the definite article heh is construed as a demonstrative. This is not a typical function of the definite article in noun phrases. It uniquely takes on this role in time constructions.

In [22]:
def val_demheh(time):
    '''Validates a demonstrative heh cx'''
    if time.tag == 'time.H':
        return True
    
def token_demheh(time):
    '''Tokenizes demonstrative heh cx'''
    this_time = time.times[0]
    heh = this_time - 1
    return f'{F.lex_utf8.v(heh)}.{F.lex_utf8.v(this_time)}'

demheh = CXdata(singles, val_demheh, token_demheh)

classes['ø_demon_heh'] = demheh.timeset

prog()

220 matching constructions found...
	0.21 (829) now accounted for


In [23]:
demheh.counts

Unnamed: 0,Total
ה.יום,194
ה.לילה,22
ה.שׁנה,3
ה.שׁביעי,1


Random inspection of mass results...

In [24]:
# random.shuffle(demheh.results['ה.יום'])

# A.show(demheh.results['ה.יום'])

Inspection of rare results...

In [25]:
#A.show(demheh.results['ה.שׁנה'])

### The ø "Attributive Anchor" Construction, [H + time + H + anchor]

This construction takes advantage of the  noun construction. It frequently occurs with the demonstrative, in which case the demonstrative serves to anchor the time word.


In [26]:
def check_atanchor(time):
    '''Validates attributive anchor construction'''
    if 'attr_patt' in time.specs and 'time' in time.specs:
        return True
    
def token_atanchor(time):
    '''Tokenize attributive anchor cx'''
    thetime = time.times[0]
    attrib = thetime + 2
    return F.g_cons_utf8.v(attrib)

attimes = CXdata(singles, check_atanchor, token_atanchor)

classes['ø_attrib_anchor'] = attimes.timeset

prog()

34 matching constructions found...
	0.22 (863) now accounted for


In [27]:
attimes.counts

Unnamed: 0,Total
זה,29
הוא,2
תיכונה,1
אלה,1
באים,1


In [28]:
A.show(attimes.results['באים'])

#### [Research and defense of this construction here]

### The "Quantified Durative" Construction

In [29]:
def qd_validate(time):
    '''
    Validates a quantified durative cx
    Selects quants but excludes
        • qualitatives
        • the number "one"
    '''

    quant_lexs = set(F.lex.v(q) for q in time.quants) # to check for אחד
    if all(['time' in time.specs, 
            'quant' in time.specs,  
            'card' in time.specs,
            quant_lexs - {'>XD/'}]):    
        return True
    
def token_qd(time):
    '''Tokenizes quantative durative cxs'''
    time_text = F.lex_utf8.v(time.times[0])
    return time_text
    
qdtimes = CXdata(singles, qd_validate, token_qd)
        
classes['ø_quant_durative'] = qdtimes.timeset
        
prog()

288 matching constructions found...
	0.3 (1151) now accounted for


In [30]:
qdtimes.counts

Unnamed: 0,Total
שׁנה,147
יום,120
חדשׁ,18
שׁבוע,2
ירח,1


In [31]:
A.show(qdtimes.results['שׁבוע'])

#### [Research and defense of this construction here]

### Qualitative Quantifiers

Qualitative quantifiers, primarily represented by כל, show a marked preference to attract an anchor, either in the form of a construct relation or a suffix. Those cases are marked out below.

In [32]:
def val_quals(time):
    '''Validates qualitative quantifier constructions'''
    if all(['time' in time.specs, 
            'qual' in time.specs, 
            'H' not in time.specs,]):
        return True
    
def tok_qual(time):
    '''Toknizes qual quants with the quantifier'''
    return F.lex_utf8.v(time.quants[0])

qual_quants = CXdata(singles, val_quals, tok_qual)

classes['ø_qual_durative'] = qual_quants.timeset

prog()

88 matching constructions found...
	0.32 (1239) now accounted for


In [33]:
qual_quants.counts

Unnamed: 0,Total
כל,63
רב,19
מספר,3
חצות,2
יתר,1


Randomized inspection of results below...

In [34]:
#random.shuffle(qual_quants.results['all'])

#A.show(qual_quants.results['all'])

The cases of חצות and יתר are interesting. Do they bear the same semantics as the others?

In [35]:
#random.shuffle(qual_quants.results['all'])

#A.show(qual_quants.results['all'])

יתר seems more straightforward to indeed align with the quantified durative expression:

In [36]:
A.show(qual_quants.results['יתר'])

### The Cases of qualQuant + H + time 

In [37]:
def val_qualHquant(time):
    '''Validates qualitative quantifiers with H + time'''
    if all(['time' in time.specs,
            'qual' in time.specs,
            'H' in time.specs
           ]):
        return True
    
def token_qhquant(time):
    '''Tokenizes a QualH quant'''
    quant = time.quants[0]
    nxt_word = quant + 1
    return f'{F.lex_utf8.v(quant)}.{F.lex_utf8.v(nxt_word)}'

qualH_quants = CXdata(singles, val_qualHquant, token_qhquant)

classes['ø_qual_durative'] |= (qualH_quants.timeset)

prog()

102 matching constructions found...
	0.35 (1339) now accounted for


In [38]:
qualH_quants.counts

Unnamed: 0,Total
כל.ה,100
מספר.ה,1
חצי.ה,1


In [39]:
#A.show(qualH_quants.results['כל.ה'])

These cases do, on initial inspection, seem to be durative as well.

#### [Research and defense of this construction here]

### Cases Where the Quantifier is אחד

In [40]:
def qd_validate_ONE(time):
    '''
    Validates a quantified durative cx
    WITH אחד
    '''

    quant_lexs = set(F.lex.v(q) for q in time.quants) # to check for אחד
    if all(['time' in time.specs, 
            'quant' in time.specs,  
            'card' in time.specs,
            '>XD/' in quant_lexs,
            not quant_lexs - {'>XD/'}
           ]):    
        return True
    
# token_qd, from above, will be used

qd_ones = CXdata(singles, qd_validate_ONE, token_qd)

classes['ø_quant_durative_ones'] = qd_ones.timeset

prog()

12 matching constructions found...
	0.35 (1351) now accounted for


In [41]:
qd_ones.counts

Unnamed: 0,Total
יום,7
שׁנה,3
רגע,1
שׁבוע,1


In [42]:
A.show(qd_ones.results['יום'])

These cases are ambiguous and need to be more thoroughly researched...

**One potential distinguishing marker is whether the time noun is construed / profiled as a "large" time or a "short" time. For instance, רגע is more evidently punctiliar:**

In [43]:
A.show(qd_ones.results['רגע'])

On the other hand שׁנה, a longer period of time, seems unanimously durative:

In [44]:
A.show(qd_ones.results['שׁנה'])

The intuition that the construed time size affects the interpretation needs to be further explored.

#### [Research and defense of this construction here]

### Standalone Plural/Dual Durative Construction

A durative can also be formed with the addition of a pluralizing morpheme such as the plural or dual ending. Those cases are shown below.

In [45]:
def val_pldura(time):
    '''Validates a standalone plural durative construction'''
    if time.tag in {'time.pl', 'time.quant.du'} or time.tag.startswith('time.pl.construct'):
        return True
    
def token_pldura(time):
    '''Tokenizes a plural durative construction'''
    return F.lex_utf8.v(time.times[0])

pldura = CXdata(singles, val_pldura, token_pldura)

classes['ø_pl_durative'] = pldura.timeset

prog()

22 matching constructions found...
	0.35 (1373) now accounted for


In [46]:
pldura.counts

Unnamed: 0,Total
יום,9
שׁנה,7
עולם,3
שׁבוע,1
לילה,1
צהרים,1


In [47]:
A.show(pldura.results['יום'])

#### [Research and defense of this construction here]

### The Construct Durative Time Units Construction

Some constructions consist of a time + construct + time units, construing a given time as a distributed and quantified entity. This is most frequently with יום as the time unit.

In [48]:
def val_durtu(time):
    '''Validates a durative time unit construction'''
    this_time = time.times[0]
    p1 = this_time + 1
    
    if all(['time' in time.specs,
            not {'quant'} & set(time.specs),
            'construct' in time.specs,
            F.nu.v(p1) == 'pl',
            F.pdp.v(p1) == 'subs',
            F.prs.v(p1) in {'absent', 'NA'}]):
        return True
    
def token_durtu(time):
    '''Tokenizes a durative time unit cx'''
    return F.lex_utf8.v(time.times[0])

durtu = CXdata(singles, val_durtu, token_durtu)

classes['ø_timeunit_durative'] = durtu.timeset

prog()

5 matching constructions found...
	0.36 (1378) now accounted for


In [49]:
A.show(durtu.results['all'])

## Check for Overlap

Which classes overlap and where?

In [50]:
overlaps = collections.defaultdict(list)

for cxclass, timeset in classes.items():
    for cxclass2, timeset2 in classes.items():
        if cxclass == cxclass2:
            continue
        
        union = f'{cxclass}|{cxclass2}'    
        overlapping = timeset & timeset2
        
        if overlapping:
            overlaps[union] = [[cx]+list(L.d(cx, 'word')) for cx in overlapping]
            
overlaps

defaultdict(list,
            {'ø_attrib_anchor|ø_qual_durative': [[1448057,
               133825,
               133826,
               133827,
               133828,
               133829],
              [1448430, 161606, 161607, 161608, 161609, 161610]],
             'ø_qual_durative|ø_attrib_anchor': [[1448057,
               133825,
               133826,
               133827,
               133828,
               133829],
              [1448430, 161606, 161607, 161608, 161609, 161610]]})

In [51]:
#A.show(overlaps['ø_attrib_anchor|quant_durative'])

## Check Where Anchors Are

Construct and suffix anchors overlap accross constructions, but are very prominent in the Qualitative Durative construction.

In [52]:
anchors = collections.Counter()
results = collections.defaultdict(list)

for timeclass, timeset in classes.items():
    for cx in timeset:
        time = Time(cx, api)
        if {'construct', 'sffx'} & set(time.specs):
            anchors[timeclass] += 1
            results[timeclass].append([cx]+list(L.d(cx, 'word')))
            
anchors = convert2pandas(anchors)

anchors

Unnamed: 0,Total
ø_qual_durative,65
ø_construct_anchor,6
ø_timeunit_durative,5
ø_quant_durative,3
ø_pl_durative,2


In [53]:
#A.show(results['quant_durative'])

### Remaining Constructions

The remaining non-prepositional, single-phrasal constructions are isolated and described below.

In [54]:
remaining = [cx for cx in get_remaining(singles)
                if Time(cx, api).tag.startswith('time.')]

show_remaining = [L.d(cx, 'word') for cx in remaining]

len(remaining)

12

In [55]:
#A.show(show_remaining)

The following cases remain:

* Multi-word Adverb Construction, **3x** (תמול שלשום) – this phrase constitutes a multiword unit, i.e. a lexicalized construction itself, and thus fits the pattern of the adverb construction.
* Cardinal Position in Calendar Construction, **2x** (אחת בשנה) – This construction is probably more similar to a multi-phrasal time construction, but the cardinals in Exodus 30:10 are in construct to the prepositional phrase, which is why these quirky constructions technically fall into the "single" phrasal constructions.
* שׁנתים ימים – only **1x**. Here is a Durative Unit construction, but the units are expressed with an adjectival relationship rather than a construct. Is there a way to include this with the construct duratives? Or is it better to keep separate?
* אישׁ חדשו (i.e. 1 Kgs 5:7) – only **1x**. I am not yet sure how to classify this cx, but it seems to have something to do with rations. I know this pattern exists in multi-phrasals, so it is best to wait for classifying this case
* Direct Demonstrative, **3x** (עתה זה) – This is an interesting case. Perhaps the attributive anchor cx cannot be used here because עתה is anchored to speech time?
* Broken Attributive Anchor Construction, **1x** (Micah 7:11): יום ההוא This may be a case of poetic license?
* שבת שבת – only **1x** in 1 Chr 9:32. This is a MT tradition issue, where the first שבת is interpreted as a construct. That would make this phrase an Anchored Accusative construction. But the Masoretic analysis appears wrong in light of the Repeat Durative construction (multi-phrasal), which will be discussed in the subsequent section on multi-phrasals.

In total this amounts to **12** edge cases.

In [56]:
classes['ø_DIFFICULT'] = set(remaining)

## Single Phrasal, Prepositional Constructions

It is expected that there is a lot of overlap here with the non-prepositional constructions. The prepositional versions will now be sorted. For the most part, the functions designed above for single phrase time adverbials are copied directly to here for modification. Following the completion of this analysis, new code will be written that more efficiently uses code to tag the constructions.  

### The Prepositional (ø) "Adverb Construction"


In [57]:
def pp_check_advb(time):
    '''Checks for adverb status'''
    if time.tag == 'PPtime':        
        return True

def pp_token_advb(time):
    '''Tokenizes adverb cx'''
    text = F.lex_utf8.v(time.times[0])
    return f'{text}.prep'

pp_adverbs = CXdata(singles, pp_check_advb, pp_token_advb)

classes['pp_adverb'] = pp_adverbs.timeset

prog()

398 matching constructions found...
	0.46 (1788) now accounted for


In [58]:
pp_adverbs.counts

Unnamed: 0,Total
עולם.prep,162
כן.prep,53
נצח.prep,25
מתי.prep,25
מחרת.prep,19
עד.prep,18
אן.prep,14
בקר.prep,11
אז.prep,8
עתה.prep,6


In [97]:
#A.show(pp_adverbs.results['קציר.prep'], condenseType='sentence')

### PP Adverb Plural Construction

### The PP "Attributive Anchor" Construction, [PP + H + time + H + anchor]

This construction takes advantage of the  noun construction. It frequently occurs with the demonstrative, in which case the demonstrative serves to anchor the time word.


In [60]:
def check_PPatanchor(time):
    '''Validates attributive anchor construction'''
    if 'attr_patt' in time.specs and 'PPtime' in time.specs:
        return True
    
def token_PPatanchor(time):
    '''Tokenize attributive anchor cx'''
    thetime = time.times[0]
    attrib = thetime + 2
    return F.g_cons_utf8.v(attrib)

PPattimes = CXdata(singles, check_PPatanchor, token_PPatanchor)

classes['pp_attrib_anchor'] = PPattimes.timeset

prog()

605 matching constructions found...
	0.62 (2393) now accounted for


In [61]:
PPattimes.counts

Unnamed: 0,Total
הוא,244
זה,109
היא,44
שׁביעי,40
הם,31
שׁלישׁי,25
אלה,14
שׁני,14
שׁמיני,14
ראשׁון,11


In [62]:
#A.show(attimes.results['באים'])

### PP + Construct Anchor Constructions

In [63]:
def val_PP_ancacc(time):
    '''Validates an anchored accusative construction'''
    if all([not {'PPtime', 'construct'} - set(time.specs),
            'quant' not in time.specs
           ]):
        return True
    
def token_PP_ancacc(time):
    return F.lex_utf8.v(time.times[0])

pp_canc = CXdata(singles, val_PP_ancacc, token_PP_ancacc)

classes['pp_construct_anchor'] = pp_canc.timeset

prog()

336 matching constructions found...
	0.7 (2729) now accounted for


In [64]:
pp_canc.counts

Unnamed: 0,Total
יום,214
עת,46
שׁנה,20
מות,16
מחרת,5
חדשׁ,4
תשׁובה,3
תחלה,3
ארך,2
ליל,2


In [65]:
#A.show(pp_canc.results['דמי'], condenseType='sentence')

**This ^ construction needs to be checked. Does דמי really function as a time word here?**

## PP Definite Anchor

These are times which are anchored to deixis via a definite article. Potentially similar to the demonstrative ה construction. But are there any clear differences?

In [66]:
def val_defanc(time):
    '''Validates a definite anchor PP construction'''
    if all([time.tag.startswith('PPtime.H'),
            'attr_patt' not in time.specs,
            'adjv' not in time.specs
           ]):
        return True
    
# token_demheh

defanc = CXdata(singles, val_defanc, token_demheh)

classes['pp_definite_anchor'] = defanc.timeset

prog()

335 matching constructions found...
	0.79 (3064) now accounted for


In [67]:
defanc.counts

Unnamed: 0,Total
ה.בקר,97
ה.ערב,74
ה.יום,61
ה.לילה,28
ה.צהרים,12
ה.ראשׁון,12
ה.מועד,6
ה.שׁנה,5
ה.עת,5
ה.שׁבת,4


In [68]:
#A.show(defanc.results['ה.זעם'], condenseType='sentence')

### PP Suffix Anchor Construction

In [69]:
def val_ppsffx(time):
    '''Validates a PP suffix anchor'''
    if all(['PPtime' in time.specs,
            'sffx' in time.specs,
            'quant' not in time.specs
           ]):
        return True
    
def token_time(time):
    '''Generic time tokener'''
    return F.lex_utf8.v(time.times[0])
    
ppsffx = CXdata(singles, val_ppsffx, token_time)

classes['pp_sffx_anchor'] = ppsffx.timeset

prog()

117 matching constructions found...
	0.82 (3181) now accounted for


In [70]:
ppsffx.counts

Unnamed: 0,Total
אחר,33
יום,24
עת,10
מות,10
נעורים,6
פנה,6
חיים,6
דור,4
דבר,2
עוד,2


In [71]:
# random.shuffle(ppsffx.results['all'])

In [72]:
# A.show(ppsffx.results['all'])

### PP Quantified Duration Construction

In [88]:
def val_pp_qdur(time):
    '''Validates a quantified duration construction'''
    if all(['card' in time.specs,
            not set(time.specs) - {'PPtime', 'pl', 'card', 'quant'}]):
        return True
    
pp_qdur = CXdata(singles, val_pp_qdur, token_time)

classes['pp_quant_duration'] = pp_qdur.timeset

prog()

32 matching constructions found...
	0.83 (3213) now accounted for


In [92]:
#random.shuffle(pp_qdur.results['all'])

In [93]:
#A.show(pp_qdur.results['all'])

### Next

In [94]:
print(f'{len(classes)} distinct classes recorded...')
print()
for cl, results in classes.items():
    print('{:>20} {:>20}'.format(cl, len(results)))

16 distinct classes recorded...

            ø_adverb                  603
  ø_construct_anchor                    6
         ø_demon_heh                  220
     ø_attrib_anchor                   34
    ø_quant_durative                  288
     ø_qual_durative                  190
ø_quant_durative_ones                   12
       ø_pl_durative                   22
 ø_timeunit_durative                    5
         ø_DIFFICULT                   12
           pp_adverb                  398
    pp_attrib_anchor                  605
 pp_construct_anchor                  336
  pp_definite_anchor                  335
      pp_sffx_anchor                  117
   pp_quant_duration                   32


In [95]:
left = get_remaining(singles)

print(len(left))

#A.show([(cx,)+L.d(cx,'phrase') for cx in left], end=20)

139


In [96]:
remaining_tags(singles)

Unnamed: 0,Total
PPtime.pl,27
PPtime.adjv,26
PPtime.quant.qual,19
PPtime.quant.card.adjv,15
PPtime.H.adjv,10
PPtime.pl.adjv,6
PPtime.pl.quant.card.adjv,5
PPtime.+VC,5
PPtime.rela+VC,4
PPtime.quant.du.adjv,4


### PP Quant Durative

In [65]:
def PP_qd_validate(time):
    '''
    Validates a quantified durative cx
    Selects quants but excludes
        • qualitatives
        • the number "one"
    '''

    quant_lexs = set(F.lex.v(q) for q in time.quants) # to check for אחד
    if all(['PPtime' in time.specs, 
            'quant' in time.specs,  
            'card' in time.specs,
            quant_lexs - {'>XD/'}]):    
        return True
    
def token_PP_qd(time):
    '''Tokenizes quantative durative cxs'''
    time_text = F.lex_utf8.v(time.times[0])
    return time_text
    
PP_qdtimes = CXdata(singles, PP_qd_validate, token_PP_qd)
        
classes['PP_quant_durative'] = PP_qdtimes.timeset
        
prog()

48 matching constructions found...
	0.63 (2430) now accounted for
