# Semantic Function Associations

In [1]:
from scripts.imports import *

out = Exporter(
    paths['outdir'], 
    'semantics'
)

In [2]:
df.columns

Index(['verse', 'book', 'booksuper', 'canon_part', 'period', 'times_etcbc',
       'times_utf8', 'times_utf8d', 'times_POS', 'TA Heads', 'head_utf8',
       'funct_type', 'function', 'quality', 'name', 'text', 'n_times',
       'lex_token', 'is_advb', 'tense', 'firstw', 'genre', 'domain', 'gendom',
       'clause', 'sentence', 'cl_rela', 'cl_type', 'cl_kind', 'verb',
       'verb_etcbc', 'verb_utf8', 'verb_text', 'verbform', 'verb_stem',
       'cl_args', 'has_objc', 'has_cmpl', 'has_subj', 'has_oc',
       'Time Position', 'cl_type2', 'cl_nsuccs', 'cl_clust50', 'cl_clust10',
       'verbtense', 'vt_order', 'PP', 'unmodified', 'modtag', 'modtag2',
       'front', 'ph_type', 'has_time', 'main_genre', 'APPO', 'DEF', 'ORDN',
       'ØPP', 'GP', 'QUANT', 'PL', 'NUM', 'PARA', 'DEMON', 'demon_type',
       'ADJV', 'TIMEAPPO', 'SPEC', 'SFX3', 'SFX', 'DU', 'SFX1', 'SFX2', 'ADVB',
       'CARDC'],
      dtype='object')

In [3]:
df.shape

(3648, 76)

In [4]:
main_df = df[df.funct_type == 'main']

# Modifier Tendencies by Function

## Simultaneous

In [5]:
simdf = main_df[main_df.function == 'simultaneous']

In [6]:
simmod_ct = pivot_ct(
    simdf,
    'ph_type',
    'modtag',
)

simmod_ct = simmod_ct.loc[['PP', 'NP']]
simmod_ct = simmod_ct.loc[:,(simmod_ct.sum() > 10)] # limit to min 10 observations

out.table(
    simmod_ct,
    'simmod_ct',
    caption='Modifer Counts for Simultaneous PP and NP',
    adjustbox=True,
)

modtag,DEF,DEMON,C,ORDN,PL+C,PL+DEMON,NUM,PL+SFX,SFX,DEF+PL
ph_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
PP,222,321,217,173,45,41,34,29,28,22
NP,216,30,2,0,0,0,4,0,1,0


In [7]:
simmod_pr = simmod_ct.div(simmod_ct.sum(1), 0)
simmod_pc = simmod_pr.mul(100).round().astype(int).astype(str)+'%'

out.table(
    simmod_pc,
    'simmod_pc',
    caption='Modifer Percentages of Simultaneous PP and NP',
    adjustbox=True
)

modtag,DEF,DEMON,C,ORDN,PL+C,PL+DEMON,NUM,PL+SFX,SFX,DEF+PL
ph_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
PP,20%,28%,19%,15%,4%,4%,3%,3%,2%,2%
NP,85%,12%,1%,0%,0%,0%,2%,0%,0%,0%


In [8]:
out.number(
    simmod_pr.loc['NP']['DEF']*100,
    'simmod_NP_DEF_pc'
)

'85'

In [9]:
# ! Uncomment for examples of all SIM NP modifiiers

# # extract examples
# for phtype in simmod_pc.index:
#     print(phtype)
#     for mod in simmod_pc.loc[phtype].index:
#         exdata = simdf[
#             (simdf.ph_type == phtype)
#             & (simdf.modtag == mod)
#         ]
#         if exdata.shape[0]:
#             print(mod)
#             display(ts.show(exdata, spread=5, extra=['modtag']))
            
#     print('-'*50)

# Anterior, posterior, anterior_dur, posterior_dur data


In [10]:
seqFuncts = ['anterior', 'posterior', 'anterior_dur', 'posterior_dur']

seq2name = {
    'anterior_dur': 'anterior durative',
    'posterior_dur': 'posterior durative',
}
seq2abbrev = {'anterior': 'ant', 'posterior': 'post', 
             'anterior_dur': 'antdur', 'posterior_dur':'postdur'}

seqdata = {}

for f in seqFuncts:
    
    fdata = main_df[main_df.function == f]
    mod_ct = fdata.modtag2.value_counts()
    mod_pr = mod_ct / mod_ct.sum()
    mod_cp = join_ct_pr(mod_ct, mod_pr)
    seqdata[f] = {
        'df': fdata,
        'mod_ct': mod_ct,
        'mod_pr': mod_pr,
        'mod_cp': mod_cp,
    }
    
    nicename = seq2name.get(f, f).title()
    abbrev = seq2abbrev[f]
    print(nicename)
    display(
        out.table(
            mod_cp,
            f'{abbrev}_mod_cp',
            caption=f'Modifier Frequencies for {nicename} Function'
        )
    )

Anterior


Unnamed: 0,count,percent
Ø,6,33%
PL+SFX,5,28%
SFX,4,22%
DEF,1,6%
PL+DEMON,1,6%
KL,1,6%


Posterior


Unnamed: 0,count,percent
Ø,111,50%
PL+SFX,34,15%
NUM+PL,15,7%
C,15,7%
PL+DEMON,12,5%
NUM,9,4%
SFX,7,3%
DEF,5,2%
PL,5,2%
PL+QUANT,3,1%


Anterior Durative


Unnamed: 0,count,percent
Ø,248,60%
DEF,61,15%
DEMON,60,15%
C,24,6%
NUM,4,1%
PL+C,4,1%
ORDN,3,1%
PL+DEMON,2,0%
PL,2,0%
QUANT+DEF,1,0%


Posterior Durative


Unnamed: 0,count,percent
Ø,32,57%
PL+C,10,18%
PL+SFX,5,9%
DEF,3,5%
DEMON,3,5%
PL,1,2%
ORDN,1,2%
NUM,1,2%


# Examine Cases

## Automatic Extraction