In [None]:
# %%

import pandas as pd

from source.utils import FREQ_DIR, RESULT_DIR, UCS_DIR, confirm_dir
from source.utils.associate import (BINARY_ASSOC_ARGS, add_extra_am,
                                    associate_ucs, confirm_basic_ucs)
from source.utils.associate import convert_ucs_to_csv as ucs2csv
from source.utils.associate import get_associations_csv as init_am, AM_DF_DIR
from source.utils.associate import manipulate_ucs, seek_readable_ucs, adjust_assoc_columns

pd.set_option('display.float_format', '{:,.2f}'.format)
FOCUS = ['f', 'unexpected_f',
         'conservative_log_ratio',
         'am_p1_given2', 'am_p2_given1',
         'am_p1_given2_simple', 'am_p2_given1_simple',
         'am_log_likelihood',
         'mutual_information', 'am_odds_ratio_disc', 't_score',
         'N', 'f1', 'f2', 'E11',
         'l1', 'l2']

In [None]:
# %%

TRIGGER_POLARITY = {
    'positive': {'all',
                 'always',
                 'both',
                 'either',
                 'everybody',
                 'everyone',
                 'everything',
                 'many',
                 'often',
                 'or',
                 'some',
                 'somebody',
                 'someone',
                 'something',
                 'sometimes'},
    'negative': {'barely',
                 'hardly',
                 'neither',
                 'never',
                 'no',
                 'nobody',
                 'none',
                 'nor',
                 'nothing',
                 'rarely',
                 'scarcely'},
}

In [None]:
# %%

TRIGGER_QUANT = {
    'existential': {
        'some',
        'somebody',
        'someone',
        'something',
        'either',
     'or',
        'sometimes'
    },
    'universal': {
        'all', 'every',
        'always',
        'everybody',
        'everyone',
        'both',
        'everything',
    },
    'not_exist': {
        'neither',
        'never',
        'no',
        'nobody',
        'none',
        'nor',
        'nothing',
    },
    'hedged_not_exist': {
        'barely',
        'hardly',
        'rarely',
        'scarcely',
        'seldom',
        'few'
    },
    'hedged_universal':{
     'many',
     'often',
     }
}

In [None]:
# %%

def invert_set_dict(d: dict):
    return {v: k for k in d for v in d[k]}

In [None]:
# %%

UNIT = 'Adj'
PAT_DIR = 'ANYmirror'
TRIG_TSV = FREQ_DIR.joinpath(
    f'{PAT_DIR}/ucs_format/Trig{UNIT}_frq-thrMIN-7.35f.tsv')
FRQ_FLOOR = 50

# %% [markdown]

 1. Run `seek_readable_ucs()` to generate consistent output path

In [None]:
readable = seek_readable_ucs(min_freq=FRQ_FLOOR,
                             ucs_subdir='trigger_eval',
                             contained_counts_path=TRIG_TSV)
print(readable.relative_to(RESULT_DIR))

    > seeking `trigger_eval/ANYmirror/readable/TrigAdj_frq-thrMIN-7.35f_min50x*` frequency data and initial associations...
ucs/trigger_eval/ANYmirror/readable/TrigAdj_frq-thrMIN-7.35f_min50x.rsort-view_am-only.txt


# %% [markdown]

 Snippet of frequencies to be analyzed (`TRIG_TSV`)

In [None]:
! head -5 {TRIG_TSV} | column -t

23978  something  different
15511  something  simple
11890  something  wrong
10693  something  special
10204  something  important


# %% [markdown]

 2. Run `confirm_basic_ucs()`

In [None]:
basic_ucs_path = readable.parent.parent.joinpath(
    readable.name.replace('.rsort-view_am-only.txt', '.ds.gz'))
print(f'Creating initial UCS table: `{basic_ucs_path.relative_to(RESULT_DIR)}')

basic_ucs_path = confirm_basic_ucs(
    basic_ucs_path,
    freq_floor=FRQ_FLOOR,
    contained_counts_path=TRIG_TSV)

Creating initial UCS table: `ucs/trigger_eval/ANYmirror/TrigAdj_frq-thrMIN-7.35f_min50x.ds.gz

## Creating initial UCS table...

```
( cat /share/compling/projects/sanpi/results/freq_out/ANYmirror/ucs_format/TrigAdj_frq-thrMIN-7.35f.tsv \ 
  | ucs-make-tables --types --threshold=50 /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/TrigAdj_frq-thrMIN-7.35f_min50x.ds.gz ) \ 
  && ucs-sort /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/TrigAdj_frq-thrMIN-7.35f_min50x.ds.gz BY f2- f1- INTO /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/TrigAdj_frq-thrMIN-7.35f_min50x.ds.gz
== Note ==
    N = total number of tokens/all counts summed
    V = total number of rows/number of unique l1+l2 combinations before filtering to 50+ tokens
+ time to make table → 00:00:00.398
```

Saving initial frequency table in readable .txt format...

```
ucs-print -o /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/readable/TrigAdj_frq-thrMIN-7.3

# %% [markdown]

 Excerpt of initial UCS table

In [None]:
init_readable = UCS_DIR.joinpath(
    f'trigger_eval/{PAT_DIR}/readable'
).joinpath(f'{TRIG_TSV.name.replace(".tsv","")}_min{FRQ_FLOOR}x.init.txt')
! head -7 {init_readable}

        l1  l2                     f     f2      f1        N  
----------  -----------------  -----  -----  ------  -------  
        or  important           3407  43776  326614  1761853  
 something  important          10204  43776  318343  1761853  
       all  important           6507  43776  249279  1761853  
      some  important            777  43776  114929  1761853  
     never  important           4677  43776  109723  1761853  


# %% [markdown]

 3. Run `associate_ucs()`

In [None]:
associate_ucs(basic_ucs_path)

transform_ucs_log = f'/share/compling/projects/sanpi/logs/associate/ucs//ucs-{PAT_DIR}_Trig{UNIT}_frq-thrMIN-7-35f_min{FRQ_FLOOR}x*.log'
! head -15 `ls -t1 {transform_ucs_log} | head -1`
! echo '...'
! tail -2 `ls -t1 {transform_ucs_log} | head -1`

! head -5 {readable}


Calculating UCS associations...

```
bash /share/compling/projects/sanpi/script/transform_ucs.sh /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/TrigAdj_frq-thrMIN-7.35f_min50x.ds.gz
> log will be saved to: /share/compling/projects/sanpi/logs/associate/ucs//ucs-ANYmirror_TrigAdj_frq-thrMIN-7-35f_min50x.2024-05-25_0136.log
...
```

+ time elapsed → 00:00:14.559
# Manipulating TrigAdj_frq-thrMIN-7-35f_min50x ucs table
path to this script: /share/compling/projects/sanpi/script/transform_ucs.sh
Sat May 25 01:36:24 EDT 2024
(TMP: /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/tmp/tmp_ANYmirror-20240525-013624.TrigAdj_frq-thrMIN-7-35f_min50x)
## Initial Contingency Info

DATA SET FILE:  /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/TrigAdj_frq-thrMIN-7.35f_min50x.ds.gz

# Frequency signatures computed by the ucs-make-tables tool for relational cooccurrences.
# Sample size:  N = 1761853 tokens,  V = 83422 pair types.
# A frequency thre

# %% [markdown]

 4. Run `ucs_to_csv`

# %% [markdown]

 4. Run `ucs_to_csv`

In [None]:
VOCABS = {'ANYmirror': {'Adv': 23125, 'Bigr': 395338, 'Adj': 83422},
          'NEGmirror': {'Adv': 5004, 'Bigr': None, 'Adj': None},
          'POSmirror': {'Adv': 18121, 'Bigr': None, 'Adj': None}
          }  # HACK
VOCAB = VOCABS[PAT_DIR][UNIT]

csv_path = ucs2csv(readable)
print(f'CSV: `{csv_path.relative_to(RESULT_DIR)}`')

trig_amdf = pd.read_csv(csv_path).convert_dtypes()
trig_amdf

UCS table text converted & saved as /share/compling/projects/sanpi/results/ucs/trigger_eval/ANYmirror/readable/TrigAdj_frq-thrMIN-7.35f_min50x.rsort-view_am-only.csv
CSV: `ucs/trigger_eval/ANYmirror/readable/TrigAdj_frq-thrMIN-7.35f_min50x.rsort-view_am-only.csv`


Unnamed: 0,l1,l2,f,E11,am_log_likelihood,am_odds_ratio_disc,am_p1_given2,am_p2_given1,am_p1_given2_simple,am_p2_given1_simple,f1,f2,N
0,never,early,4901,334.43,24295.64,2.22,0.85,0.04,0.91,0.04,109723,5370,1761853
1,or,thereof,71,13.16,239.33,2.80,0.81,0.00,1.00,0.00,326614,71,1761853
2,or,upstart,70,12.98,235.96,2.79,0.81,0.00,1.00,0.00,326614,70,1761853
3,or,triple,121,22.62,396.70,2.55,0.81,0.00,0.99,0.00,326614,122,1761853
4,all,doom,86,13.02,293.85,1.91,0.79,0.00,0.93,0.00,249279,92,1761853
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5097,something,aware,98,2033.63,-3671.82,-1.40,-0.17,-0.01,0.01,0.00,318343,11255,1761853
5098,something,excited,66,1791.69,-3370.12,-1.52,-0.18,-0.01,0.01,0.00,318343,9916,1761853
5099,something,sure,59,1760.61,-3353.01,-1.56,-0.18,-0.01,0.01,0.00,318343,9744,1761853
5100,something,happy,92,2594.12,-4912.03,-1.54,-0.18,-0.01,0.01,0.00,318343,14357,1761853


In [None]:
# %%

trig_amdf['key'] = (trig_amdf.l1 + '~' +
                    trig_amdf.l2).astype('string')
trig_amdf = trig_amdf.set_index('key')
trig_amdf
# %% [markdown]
# 6. Save to `./results/assoc_df/`

df_csv_path = AM_DF_DIR.joinpath(
    str(csv_path.relative_to(UCS_DIR))
    .replace('/readable', '')
    .replace('.rsort-view_am-only', ''))

if not df_csv_path.is_file():
    confirm_dir(df_csv_path.parent)
    trig_amdf.to_csv(df_csv_path)

df_pkl_path = df_csv_path.with_suffix('.pkl.gz')
if not df_pkl_path.is_file():
    trig_amdf.to_pickle(df_csv_path.with_suffix('.pkl.gz'))
# %% [markdown]
# 7. Add additional AM via `add_extra_am()`
ex_trig_amdf = add_extra_am(df=trig_amdf,
                            verbose=True,
                            vocab=VOCAB,
                            metrics=['t_score', 'mutual_information']).convert_dtypes()

#%% [markdown]
# Add trigger features as columns: polarity and quantification type
def add_feature(triggers:pd.Series, 
                group_dict: dict) -> pd.Series:
    assign_dict = invert_set_dict(group_dict)
    return triggers.apply(lambda x: assign_dict[x] if x in assign_dict.keys() else '')

ex_trig_amdf['polarity'] = add_feature(ex_trig_amdf.l1, TRIGGER_POLARITY)
ex_trig_amdf['quant'] = add_feature(ex_trig_amdf.l1, TRIGGER_QUANT)
# %% [markdown]
# Save extended AM tables to `extra/` subdirectory if not already saved
df_extra_csv = df_csv_path.parent / 'extra' / \
    df_csv_path.name.replace('.csv', '_extra.csv')
print(df_extra_csv)
if not df_extra_csv.is_file():
    confirm_dir(df_extra_csv.parent)
    ex_trig_amdf.to_csv(df_extra_csv)

df_extra_pkl = df_extra_csv.with_suffix('.pkl.gz')
if not df_extra_pkl.is_file():
    ex_trig_amdf.to_pickle(df_extra_pkl)

# %%
ex_trig_full = ex_trig_amdf.copy()
ex_trig_amdf = adjust_assoc_columns(
    ex_trig_full[['polarity', 'quant'] + FOCUS]).sort_values('LRC', ascending=False)
# %% [markdown]
# Strongest Associations
# Top 8 consevative log ratio $LRC$ values
ex_trig_amdf.nlargest(8, "LRC")
# %% [markdown]
# Top 8 $\Delta P(\texttt{trigger}|\texttt{adv})$ values
ex_trig_amdf.nlargest(8, "dP1")
# %% [markdown]
# Top 8 conditional probability $P(\texttt{trigger}|\texttt{adv})$ values
ex_trig_amdf.nlargest(8, "dP1_simple")
# %% [markdown]
# Top 8 $\Delta P(\texttt{adv}|\texttt{trigger})$ values
ex_trig_amdf.nlargest(8, "dP2")
# %% [markdown]
# Top 8 conditional probability $P(\texttt{adv}|\texttt{trigger})$ values
ex_trig_amdf.nlargest(8, "dP2_simple")
# %% [markdown]
# Top 8 log-likelihood $G^2$ values
ex_trig_amdf.nlargest(8, "G2")
# %%

if UNIT=='Adj': 
    exit
# %% [markdown]
# ---
# Examples for Positive Polarity Prone Adverbs (Top 20 by $LRC$)
# `trigger`~_pretty_
ex_trig_amdf.filter(like='~pretty', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_rather_
ex_trig_amdf.filter(like='~rather', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_fairly_
ex_trig_amdf.filter(like='~fairly', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_somewhat_
ex_trig_amdf.filter(like='~somewhat', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_utterly_
ex_trig_amdf.filter(like='~utterly', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_definitely_
ex_trig_amdf.filter(like='~definitely', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_slightly_
ex_trig_amdf.filter(like='~slightly', axis=0).iloc[:20, :]
# %% [markdown]
# ---
# Examples for Negative Polarity Prone Adverbs (Top 20 by $LRC$)
# `trigger`~_exactly_
ex_trig_amdf.filter(like='~exactly', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_before_ rows
ex_trig_amdf.filter(like='~before', axis=0).iloc[:20, :]

# %% [markdown]
# `trigger`~_that_ rows
ex_trig_amdf.filter(like='~that', axis=0).iloc[:20, :]

# %% [markdown]
# `trigger`~_ever_ rows
ex_trig_amdf.filter(like='~ever', axis=0).iloc[:20, :]


# %% [markdown]
# `trigger`~_necessarily_ rows
ex_trig_amdf.filter(like='~necessarily', axis=0).iloc[:20, :]
# %% [markdown]
# `trigger`~_yet_ rows
ex_trig_amdf.filter(like='~yet', axis=0).iloc[:20, :]


Preview of Extended Measures (rounded)

| key               |   t_score |   mutual_information |   deltaP_min |   deltaP_max |   deltaP_max_abs |   deltaP_product |   unexpected_f |   unexpected_ratio |
|:------------------|----------:|---------------------:|-------------:|-------------:|-----------------:|-----------------:|---------------:|-------------------:|
| never~early       |     65.23 |                 1.17 |         0.04 |         0.85 |             0.85 |             0.04 |       4,566.57 |               0.93 |
| or~thereof        |      6.86 |                 0.73 |         0.00 |         0.81 |             0.81 |             0.00 |          57.84 |               0.81 |
| or~upstart        |      6.82 |                 0.73 |         0.00 |         0.81 |             0.81 |             0.00 |          57.02 |               0.81 |
| or~triple         |      8.94 |                 0.73 |         0.00 |         0.81 |             0.81 |             0.00 |          98.38 |   

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


# %% [markdown]

 6. Save to `./results/assoc_df/`

In [None]:
df_csv_path = AM_DF_DIR.joinpath(
    str(csv_path.relative_to(UCS_DIR))
    .replace('/readable', '')
    .replace('.rsort-view_am-only', ''))

if not df_csv_path.is_file():
    confirm_dir(df_csv_path.parent)
    trig_amdf.to_csv(df_csv_path)

df_pkl_path = df_csv_path.with_suffix('.pkl.gz')
if not df_pkl_path.is_file():
    trig_amdf.to_pickle(df_csv_path.with_suffix('.pkl.gz'))

# %% [markdown]

 7. Add additional AM via `add_extra_am()`

In [None]:
ex_trig_amdf = add_extra_am(df=trig_amdf,
                            verbose=True,
                            vocab=VOCAB,
                            metrics=['t_score', 'mutual_information']).convert_dtypes()


Preview of Extended Measures (rounded)

| key               |   t_score |   mutual_information |   deltaP_min |   deltaP_max |   deltaP_max_abs |   deltaP_product |   unexpected_f |   unexpected_ratio |
|:------------------|----------:|---------------------:|-------------:|-------------:|-----------------:|-----------------:|---------------:|-------------------:|
| never~early       |     65.23 |                 1.17 |         0.04 |         0.85 |             0.85 |             0.04 |       4,566.57 |               0.93 |
| or~thereof        |      6.86 |                 0.73 |         0.00 |         0.81 |             0.81 |             0.00 |          57.84 |               0.81 |
| or~upstart        |      6.82 |                 0.73 |         0.00 |         0.81 |             0.81 |             0.00 |          57.02 |               0.81 |
| or~triple         |      8.94 |                 0.73 |         0.00 |         0.81 |             0.81 |             0.00 |          98.38 |   

#%% [markdown]

 Add trigger features as columns: polarity and quantification type

In [None]:
def add_feature(triggers:pd.Series, 
                group_dict: dict) -> pd.Series:
    assign_dict = invert_set_dict(group_dict)
    return triggers.apply(lambda x: assign_dict[x] if x in assign_dict.keys() else '')

ex_trig_amdf['polarity'] = add_feature(ex_trig_amdf.l1, TRIGGER_POLARITY)
ex_trig_amdf['quant'] = add_feature(ex_trig_amdf.l1, TRIGGER_QUANT)

# %% [markdown]

 Save extended AM tables to `extra/` subdirectory if not already saved

In [None]:
df_extra_csv = df_csv_path.parent / 'extra' / \
    df_csv_path.name.replace('.csv', '_extra.csv')
print(df_extra_csv)
if not df_extra_csv.is_file():
    confirm_dir(df_extra_csv.parent)
    ex_trig_amdf.to_csv(df_extra_csv)

df_extra_pkl = df_extra_csv.with_suffix('.pkl.gz')
if not df_extra_pkl.is_file():
    ex_trig_amdf.to_pickle(df_extra_pkl)

/share/compling/projects/sanpi/results/assoc_df/trigger_eval/ANYmirror/extra/TrigAdj_frq-thrMIN-7.35f_min50x_extra.csv


In [None]:
# %%

ex_trig_full = ex_trig_amdf.copy()
ex_trig_amdf = adjust_assoc_columns(
    ex_trig_full[['polarity', 'quant'] + FOCUS]).sort_values('LRC', ascending=False)

# %% [markdown]

 Strongest Associations
 Top 8 consevative log ratio $LRC$ values

In [None]:
ex_trig_amdf.nlargest(8, "LRC")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
never~early,negative,not_exist,4901,4566.57,6.92,0.85,0.04,0.91,0.04,24295.64,1.17,2.22,65.23,1761853,109723,5370,334.43,never,early
never~late,negative,not_exist,10804,9984.87,5.94,0.76,0.1,0.82,0.1,48950.8,1.12,1.88,96.06,1761853,109723,13153,819.13,never,late
everything~peachy,positive,universal,82,79.73,5.57,0.79,0.0,0.81,0.0,526.14,1.56,2.27,8.81,1761853,39516,101,2.27,everything,peachy
nobody~hurt,negative,not_exist,55,54.28,5.48,0.27,0.01,0.27,0.01,385.11,1.88,2.03,7.32,1761853,6311,201,0.72,nobody,hurt
nobody~sure,negative,not_exist,1232,1197.1,5.11,0.12,0.19,0.13,0.2,6786.35,1.55,1.7,34.11,1761853,6311,9744,34.9,nobody,sure
or~third,positive,existential,437,354.13,5.08,0.79,0.0,0.98,0.0,1381.79,0.72,2.26,16.94,1761853,326614,447,82.87,or,third
nobody~surprised,negative,not_exist,259,252.54,5.03,0.14,0.04,0.14,0.04,1454.67,1.6,1.69,15.69,1761853,6311,1803,6.46,nobody,surprised
someone~senior,positive,existential,112,108.48,4.99,0.52,0.0,0.54,0.0,629.91,1.5,1.83,10.25,1761853,29662,209,3.52,someone,senior


# %% [markdown]

 Top 8 $\Delta P(\texttt{trigger}|\texttt{adv})$ values

In [None]:
ex_trig_amdf.nlargest(8, "dP1")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
never~early,negative,not_exist,4901,4566.57,6.92,0.85,0.04,0.91,0.04,24295.64,1.17,2.22,65.23,1761853,109723,5370,334.43,never,early
or~thereof,positive,existential,71,57.84,0.41,0.81,0.0,1.0,0.0,239.33,0.73,2.8,6.86,1761853,326614,71,13.16,or,thereof
or~upstart,positive,existential,70,57.02,0.39,0.81,0.0,1.0,0.0,235.96,0.73,2.79,6.82,1761853,326614,70,12.98,or,upstart
or~triple,positive,existential,121,98.38,1.2,0.81,0.0,0.99,0.0,396.7,0.73,2.55,8.94,1761853,326614,122,22.62,or,triple
all~doom,positive,universal,86,72.98,3.14,0.79,0.0,0.93,0.0,293.85,0.82,1.91,7.87,1761853,249279,92,13.02,all,doom
or~third,positive,existential,437,354.13,5.08,0.79,0.0,0.98,0.0,1381.79,0.72,2.26,16.94,1761853,326614,447,82.87,or,third
or~fourth,positive,existential,213,172.59,4.01,0.79,0.0,0.98,0.0,672.48,0.72,2.23,11.83,1761853,326614,218,40.41,or,fourth
everything~peachy,positive,universal,82,79.73,5.57,0.79,0.0,0.81,0.0,526.14,1.56,2.27,8.81,1761853,39516,101,2.27,everything,peachy


# %% [markdown]

 Top 8 conditional probability $P(\texttt{trigger}|\texttt{adv})$ values

In [None]:
ex_trig_amdf.nlargest(8, "dP1_simple")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
or~thereof,positive,existential,71,57.84,0.41,0.81,0.0,1.0,0.0,239.33,0.73,2.8,6.86,1761853,326614,71,13.16,or,thereof
or~upstart,positive,existential,70,57.02,0.39,0.81,0.0,1.0,0.0,235.96,0.73,2.79,6.82,1761853,326614,70,12.98,or,upstart
or~triple,positive,existential,121,98.38,1.2,0.81,0.0,0.99,0.0,396.7,0.73,2.55,8.94,1761853,326614,122,22.62,or,triple
or~third,positive,existential,437,354.13,5.08,0.79,0.0,0.98,0.0,1381.79,0.72,2.26,16.94,1761853,326614,447,82.87,or,third
or~fourth,positive,existential,213,172.59,4.01,0.79,0.0,0.98,0.0,672.48,0.72,2.23,11.83,1761853,326614,218,40.41,or,fourth
or~medium,positive,existential,116,93.94,2.84,0.79,0.0,0.97,0.0,364.26,0.72,2.17,8.72,1761853,326614,119,22.06,or,medium
or~least,positive,existential,104,84.16,2.67,0.79,0.0,0.97,0.0,324.45,0.72,2.12,8.25,1761853,326614,107,19.84,or,least
or~fifth,positive,existential,66,53.39,1.57,0.79,0.0,0.97,0.0,205.25,0.72,2.07,6.57,1761853,326614,68,12.61,or,fifth


# %% [markdown]

 Top 8 $\Delta P(\texttt{adv}|\texttt{trigger})$ values

In [None]:
ex_trig_amdf.nlargest(8, "dP2")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
nobody~sure,negative,not_exist,1232,1197.1,5.11,0.12,0.19,0.13,0.2,6786.35,1.55,1.7,34.11,1761853,6311,9744,34.9,nobody,sure
never~late,negative,not_exist,10804,9984.87,5.94,0.76,0.1,0.82,0.1,48950.8,1.12,1.88,96.06,1761853,109723,13153,819.13,never,late
everybody~excited,positive,universal,665,620.17,3.68,0.06,0.08,0.07,0.08,2436.5,1.17,1.24,24.05,1761853,7965,9916,44.83,everybody,excited
hardly~different,negative,hedged_not_exist,519,407.29,1.9,0.01,0.07,0.01,0.09,816.31,0.67,0.71,17.88,1761853,5468,35994,111.71,hardly,different
something~different,positive,existential,23978,17474.37,3.09,0.5,0.07,0.67,0.08,42208.42,0.57,0.99,112.85,1761853,318343,35994,6503.63,something,different
scarcely~different,negative,hedged_not_exist,68,51.96,1.18,0.0,0.07,0.0,0.09,96.21,0.63,0.66,6.3,1761853,785,35994,16.04,scarcely,different
everything~right,positive,universal,2739,2436.82,3.31,0.18,0.06,0.2,0.07,7841.33,0.96,1.07,46.56,1761853,39516,13473,302.18,everything,right
nothing~important,negative,not_exist,8025,5570.46,1.82,0.13,0.06,0.18,0.08,9007.03,0.51,0.6,62.18,1761853,98788,43776,2454.54,nothing,important


# %% [markdown]

 Top 8 conditional probability $P(\texttt{trigger}|\texttt{adv})$ values

In [None]:
ex_trig_amdf.nlargest(8, "dP1_simple")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
or~thereof,positive,existential,71,57.84,0.41,0.81,0.0,1.0,0.0,239.33,0.73,2.8,6.86,1761853,326614,71,13.16,or,thereof
or~upstart,positive,existential,70,57.02,0.39,0.81,0.0,1.0,0.0,235.96,0.73,2.79,6.82,1761853,326614,70,12.98,or,upstart
or~triple,positive,existential,121,98.38,1.2,0.81,0.0,0.99,0.0,396.7,0.73,2.55,8.94,1761853,326614,122,22.62,or,triple
or~third,positive,existential,437,354.13,5.08,0.79,0.0,0.98,0.0,1381.79,0.72,2.26,16.94,1761853,326614,447,82.87,or,third
or~fourth,positive,existential,213,172.59,4.01,0.79,0.0,0.98,0.0,672.48,0.72,2.23,11.83,1761853,326614,218,40.41,or,fourth
or~medium,positive,existential,116,93.94,2.84,0.79,0.0,0.97,0.0,364.26,0.72,2.17,8.72,1761853,326614,119,22.06,or,medium
or~least,positive,existential,104,84.16,2.67,0.79,0.0,0.97,0.0,324.45,0.72,2.12,8.25,1761853,326614,107,19.84,or,least
or~fifth,positive,existential,66,53.39,1.57,0.79,0.0,0.97,0.0,205.25,0.72,2.07,6.57,1761853,326614,68,12.61,or,fifth


# %% [markdown]

 Top 8 $\Delta P(\texttt{adv}|\texttt{trigger})$ values

In [None]:
ex_trig_amdf.nlargest(8, "dP2")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
nobody~sure,negative,not_exist,1232,1197.1,5.11,0.12,0.19,0.13,0.2,6786.35,1.55,1.7,34.11,1761853,6311,9744,34.9,nobody,sure
never~late,negative,not_exist,10804,9984.87,5.94,0.76,0.1,0.82,0.1,48950.8,1.12,1.88,96.06,1761853,109723,13153,819.13,never,late
everybody~excited,positive,universal,665,620.17,3.68,0.06,0.08,0.07,0.08,2436.5,1.17,1.24,24.05,1761853,7965,9916,44.83,everybody,excited
hardly~different,negative,hedged_not_exist,519,407.29,1.9,0.01,0.07,0.01,0.09,816.31,0.67,0.71,17.88,1761853,5468,35994,111.71,hardly,different
something~different,positive,existential,23978,17474.37,3.09,0.5,0.07,0.67,0.08,42208.42,0.57,0.99,112.85,1761853,318343,35994,6503.63,something,different
scarcely~different,negative,hedged_not_exist,68,51.96,1.18,0.0,0.07,0.0,0.09,96.21,0.63,0.66,6.3,1761853,785,35994,16.04,scarcely,different
everything~right,positive,universal,2739,2436.82,3.31,0.18,0.06,0.2,0.07,7841.33,0.96,1.07,46.56,1761853,39516,13473,302.18,everything,right
nothing~important,negative,not_exist,8025,5570.46,1.82,0.13,0.06,0.18,0.08,9007.03,0.51,0.6,62.18,1761853,98788,43776,2454.54,nothing,important


# %% [markdown]

 Top 8 conditional probability $P(\texttt{adv}|\texttt{trigger})$ values

In [None]:
ex_trig_amdf.nlargest(8, "dP2_simple")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
nobody~sure,negative,not_exist,1232,1197.1,5.11,0.12,0.19,0.13,0.2,6786.35,1.55,1.7,34.11,1761853,6311,9744,34.9,nobody,sure
never~late,negative,not_exist,10804,9984.87,5.94,0.76,0.1,0.82,0.1,48950.8,1.12,1.88,96.06,1761853,109723,13153,819.13,never,late
hardly~different,negative,hedged_not_exist,519,407.29,1.9,0.01,0.07,0.01,0.09,816.31,0.67,0.71,17.88,1761853,5468,35994,111.71,hardly,different
scarcely~different,negative,hedged_not_exist,68,51.96,1.18,0.0,0.07,0.0,0.09,96.21,0.63,0.66,6.3,1761853,785,35994,16.04,scarcely,different
everybody~excited,positive,universal,665,620.17,3.68,0.06,0.08,0.07,0.08,2436.5,1.17,1.24,24.05,1761853,7965,9916,44.83,everybody,excited
nothing~important,negative,not_exist,8025,5570.46,1.82,0.13,0.06,0.18,0.08,9007.03,0.51,0.6,62.18,1761853,98788,43776,2454.54,nothing,important
something~different,positive,existential,23978,17474.37,3.09,0.5,0.07,0.67,0.08,42208.42,0.57,0.99,112.85,1761853,318343,35994,6503.63,something,different
everything~right,positive,universal,2739,2436.82,3.31,0.18,0.06,0.2,0.07,7841.33,0.96,1.07,46.56,1761853,39516,13473,302.18,everything,right


# %% [markdown]

 Top 8 log-likelihood $G^2$ values

In [None]:
ex_trig_amdf.nlargest(8, "G2")

Unnamed: 0_level_0,polarity,quant,f,unexp_f,LRC,dP1,dP2,dP1_simple,dP2_simple,G2,MI,odds_r_disc,t,N,f1,f2,exp_f,l1,l2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
never~late,negative,not_exist,10804,9984.87,5.94,0.76,0.1,0.82,0.1,48950.8,1.12,1.88,96.06,1761853,109723,13153,819.13,never,late
something~different,positive,existential,23978,17474.37,3.09,0.5,0.07,0.67,0.08,42208.42,0.57,0.99,112.85,1761853,318343,35994,6503.63,something,different
never~early,negative,not_exist,4901,4566.57,6.92,0.85,0.04,0.91,0.04,24295.64,1.17,2.22,65.23,1761853,109723,5370,334.43,never,early
something~simple,positive,existential,15511,10924.82,2.73,0.44,0.04,0.61,0.05,23558.14,0.53,0.87,87.72,1761853,318343,25382,4586.18,something,simple
something~special,positive,existential,10693,7895.43,3.2,0.51,0.03,0.69,0.03,19587.7,0.58,1.02,76.35,1761853,318343,15483,2797.57,something,special
all~familiar,positive,universal,8180,6355.53,3.25,0.5,0.03,0.63,0.03,16689.22,0.65,1.04,70.27,1761853,249279,12895,1824.47,all,familiar
something~wrong,positive,existential,11890,8119.8,2.48,0.39,0.03,0.57,0.04,16004.43,0.5,0.79,74.47,1761853,318343,20866,3770.2,something,wrong
all~common,positive,universal,5197,4108.97,3.47,0.54,0.02,0.68,0.02,11477.03,0.68,1.11,57.0,1761853,249279,7690,1088.03,all,common
