# Identifying Adverbs with Strongest Negative Environment Associations

In [178]:
from pathlib import Path

import pandas as pd
from pprint import pprint
# from source.utils import PKL_SUFF
from source.utils.associate import AM_DF_DIR, TOP_AM_DIR, adjust_assoc_columns
from source.utils.general import print_iter, snake_to_camel, timestamp_today, confirm_dir

SET_FLOOR = 300
MIR_FLOOR = 100
K = 6

TAG='ALL'
TOP_AM_TAG_DIR = TOP_AM_DIR / TAG
confirm_dir(TOP_AM_TAG_DIR)

# for loading `polar/*/bigram/*` tables
bigram_floor = 100
mirror_floor = 50

Set columns and diplay settings

In [179]:
FOCUS = ['f',
         'am_p1_given2', 'am_p1_given2_simple', 'conservative_log_ratio',
         'am_log_likelihood',
        #  'mutual_information', 
         'am_odds_ratio_disc', 't_score',
         'N', 'f1', 'f2', 'E11', 'unexpected_f', 
         'l1', 'l2']
pd.set_option('display.max_colwidth', 20)
pd.set_option('display.max_columns', 12)
pd.set_option('display.width', 90)
pd.set_option("display.precision", 2)
pd.set_option("styler.format.precision", 2)
pd.set_option("styler.format.thousands", ",")
pd.set_option("display.float_format", '{:,.2f}'.format)
# pd.set_option("styler.render.repr", "html")

In [180]:
def force_ints(_df):
    count_cols = _df.filter(regex=r'total|^[fN]').columns
    _df[count_cols] = _df[count_cols].astype('int')
    # _df[count_cols] = _df[:, count_cols].astype('int64')
    # print(_df.dtypes.to_frame('dtypes'))
    return _df

In [181]:
def nb_show_table(df, n_dec: int = 2,
                  adjust_columns: bool = True,
                   outpath:Path=None, 
                   return_df:bool=False) -> None: 
    _df = df.copy()
    try: 
        start_0 = _df.index.start == 0
    except AttributeError: 
        pass
    else:
        _df.index.name = 'rank'
        if start_0: 
            _df.index = _df.index + 1
    if adjust_columns: 
        _df = adjust_assoc_columns(_df)
    _df.columns = [f'`{c}`' for c in _df.columns]
    _df.index = [f'**{r}**' for r in _df.index ]
    table = _df.to_markdown(floatfmt=f',.{n_dec}f', intfmt=',')
    if outpath:
        outpath.write_text(table)

    print(f'\n{table}\n')
    return (_df if return_df else None)

## Set paths and load adverb association tables

In [182]:
def update_index(df, pat_name:str = None):
    neg_env_name = df.filter(like='NEG', axis=0).l1.iloc[0]
    # > will be either `NEGATED` or `NEGMIR`
    #   both are shortened to just `NEG` for the keys in their separate dataframes
    # > replace to avoid ambiguity in `key` values when combined
    #! some filtering relies on 'NEG', so have to keep that prefix
    index_update = pat_name or ('NEGmir' if neg_env_name.endswith('MIR') else 'NEGany')
    df.index = df.index.str.replace('NEG', index_update)
    return df

In [183]:
POLAR_DIR = AM_DF_DIR.joinpath('polar')

polar_adv_dirs = []
# results/assoc_df/polar/RBdirect/adv/extra/polarized-adv_35f-7c_min5000x_extra.pkl.gz
adv_am_paths = {
    p.name: tuple(
        p.joinpath('adv/extra').glob(
            f'*{TAG}*min{SET_FLOOR}*parq')
    )[0]
    for p in POLAR_DIR.iterdir()}
pprint(adv_am_paths)

{'RBdirect': PosixPath('/share/compling/projects/sanpi/results/assoc_df/polar/RBdirect/adv/extra/polarized-adv_ALL-direct_min300x_extra.parq'),
 'mirror': PosixPath('/share/compling/projects/sanpi/results/assoc_df/polar/mirror/adv/extra/polarized-adv_ALL-mirror_min300x_extra.parq')}


In [184]:
setdiff_adv = update_index(pd.read_parquet(adv_am_paths['RBdirect'], columns=FOCUS))
mirror_adv = update_index(pd.read_parquet(adv_am_paths['mirror'], columns=FOCUS))
nb_show_table(setdiff_adv.sample(min(6,K)).sort_values('conservative_log_ratio', ascending=False))


|                       |    `f` |   `dP1` |   `dP1_simple` |   `LRC` |      `G2` |   `odds_r_disc` |    `t` |        `N` |       `f1` |    `f2` |   `exp_f` |   `unexp_f` | `l1`       | `l2`          |
|:----------------------|-------:|--------:|---------------:|--------:|----------:|----------------:|-------:|-----------:|-----------:|--------:|----------:|------------:|:-----------|:--------------|
| **COM~undoubtedly**   | 11,657 |    0.04 |           1.00 |    3.81 |    949.13 |            1.75 |   4.63 | 72,839,589 | 69,662,736 |  11,666 | 11,157.19 |      499.81 | COMPLEMENT | undoubtedly   |
| **NEGany~just**       | 54,583 |    0.05 |           0.09 |    1.09 | 24,785.84 |            0.34 | 121.00 | 72,839,589 |  3,173,660 | 603,929 | 26,313.51 |   28,269.49 | NEGATED    | just          |
| **COM~gruesomely**    |    537 |    0.03 |           0.99 |    0.00 |     22.41 |            0.65 |   0.80 | 72,839,589 | 69,662,736 |     542 |    518.36 |       18.64 | COMPLEMENT | grues

In [185]:
nb_show_table(mirror_adv.sample(min(6,K)).sort_values('conservative_log_ratio', ascending=False))


|                      |    `f` |   `dP1` |   `dP1_simple` |   `LRC` |     `G2` |   `odds_r_disc` |   `t` |       `N` |      `f1` |   `f2` |   `exp_f` |   `unexp_f` | `l1`   | `l2`         |
|:---------------------|-------:|--------:|---------------:|--------:|---------:|----------------:|------:|----------:|----------:|-------:|----------:|------------:|:-------|:-------------|
| **POS~just**         | 27,910 |    0.15 |           0.97 |    2.68 | 6,262.48 |            0.89 | 24.91 | 1,701,929 | 1,410,172 | 28,662 | 23,748.55 |    4,161.45 | POSMIR | just         |
| **POS~increasingly** |    671 |    0.16 |           0.99 |    2.05 |   193.67 |            1.21 |  4.18 | 1,701,929 | 1,410,172 |    679 |    562.60 |      108.40 | POSMIR | increasingly |
| **POS~personally**   |    512 |   -0.02 |           0.81 |    0.00 |    -2.15 |           -0.07 | -0.63 | 1,701,929 | 1,410,172 |    635 |    526.14 |      -14.14 | POSMIR | personally   |
| **POS~medically**    |    441 |   -0.11 | 

## Calculate "Most Negative" Adverbs for each Polarity Approximation

In [186]:
def get_top_vals(df: pd.DataFrame,
                 index_like: str = 'NEG',
                 metric_filter: str | list = ['am_p1_given2', 'conservative_log_ratio'],
                 k: int = 10,
                 val_col: str = None,
                 ignore_neg_adv: bool = True):
    env_df = df.copy().loc[df.conservative_log_ratio >=
                           1].filter(like=index_like, axis=0)
    if ignore_neg_adv:
        env_df = env_df.loc[~df.l2.isin(
            ("n't", 'not', 'barely', 'never', 'no', 'none')), :]
    if isinstance(metric_filter, str):
        metric_filter = [metric_filter]

    top = pd.concat([env_df.nlargest(k, m) for m in metric_filter]
                    ).drop_duplicates(keep='first')

    if val_col:
        top = top[[val_col] + metric_filter]

    return top.sort_values(metric_filter, ascending=False)


[setdiff_top15, mirror_top15] = [
    get_top_vals(adv_df, k=15)
    for adv_df in (setdiff_adv, mirror_adv)
]


In [187]:
nb_show_table(setdiff_top15.reset_index().filter(regex=r'^[^l]'))


|        | `key`              |     `f` |   `dP1` |   `dP1_simple` |   `LRC` |       `G2` |   `odds_r_disc` |    `t` |        `N` |      `f1` |    `f2` |   `exp_f` |   `unexp_f` |
|:-------|:-------------------|--------:|--------:|---------------:|--------:|-----------:|----------------:|-------:|-----------:|----------:|--------:|----------:|------------:|
| **1**  | NEGany~necessarily |  42,595 |    0.83 |           0.87 |    7.10 | 230,257.34 |            2.17 | 196.05 | 72,839,589 | 3,173,660 |  48,947 |  2,132.65 |   40,462.35 |
| **2**  | NEGany~that        | 164,768 |    0.75 |           0.79 |    6.34 | 831,137.25 |            1.94 | 383.56 | 72,839,589 | 3,173,660 | 208,262 |  9,074.09 |  155,693.91 |
| **3**  | NEGany~exactly     |  43,813 |    0.70 |           0.75 |    5.94 | 210,126.60 |            1.82 | 197.11 | 72,839,589 | 3,173,660 |  58,643 |  2,555.11 |   41,257.89 |
| **4**  | NEGany~immediately |  56,099 |    0.54 |           0.58 |    4.86 | 224,059.55 |        

15 Most Negatively Associated Adverbs for full dataset (_Absent Negative_ approximation) as ranked by $\Delta P(1|2)$ (`dP1`) and $LRC$

<!-- |        | `key`              |     `f` | `dP1` | `LRC` |       `G2` |        `N` |      `f1` |    `f2` |   `exp_f` |  `unexp_f` | `l1`    | `l2`        |
-----|:-------|:-------------------|--------:|------:|------:|-----------:|-----------:|----------:|--------:|----------:|-----------:|:--------|:------------|----
     | **0**  | NEGany~necessarily |  42,708 |  0.72 |  6.23 | 219,003.46 | 86,330,752 | 3,226,213 |  56,694 |  2,118.68 |  40,589.32 | NEGATED | necessarily |
     | **1**  | NEGany~exactly     |  43,635 |  0.67 |  5.90 | 214,404.20 | 86,330,752 | 3,226,213 |  61,599 |  2,301.98 |  41,333.02 | NEGATED | exactly     |
     | **2**  | NEGany~that        | 165,411 |  0.63 |  5.62 | 781,016.11 | 86,330,752 | 3,226,213 | 250,392 |  9,357.24 | 156,053.76 | NEGATED | that        |
     | **3**  | NEGany~immediately |  57,319 |  0.52 |  4.96 | 239,462.58 | 86,330,752 | 3,226,213 | 103,177 |  3,855.76 |  53,463.24 | NEGATED | immediately |
     | **4**  | NEGany~yet         |  52,546 |  0.48 |  4.74 | 209,055.78 | 86,330,752 | 3,226,213 | 101,707 |  3,800.83 |  48,745.17 | NEGATED | yet         |
     | **5**  | NEGany~terribly    |  18,054 |  0.22 |  3.09 |  42,704.93 | 86,330,752 | 3,226,213 |  70,174 |  2,622.43 |  15,431.57 | NEGATED | terribly    |
     | **6**  | NEGany~remotely    |   5,679 |  0.22 |  3.03 |  13,354.33 | 86,330,752 | 3,226,213 |  22,194 |    829.40 |   4,849.60 | NEGATED | remotely    |
     | **7**  | NEGany~only        | 114,070 |  0.21 |  3.04 | 261,936.36 | 86,330,752 | 3,226,213 | 464,168 | 17,346.13 |  96,723.87 | NEGATED | only        |
     | **8**  | NEGany~altogether  |   4,575 |  0.18 |  2.75 |   9,468.00 | 86,330,752 | 3,226,213 |  20,636 |    771.17 |   3,803.82 | NEGATED | altogether  |
     | **9**  | NEGany~entirely    |  63,708 |  0.17 |  2.74 | 125,925.14 | 86,330,752 | 3,226,213 | 303,833 | 11,354.35 |  52,353.65 | NEGATED | entirely    |
     | **10** | NEGany~overly      |  24,707 |  0.17 |  2.66 |  46,993.58 | 86,330,752 | 3,226,213 | 122,058 |  4,561.35 |  20,145.65 | NEGATED | overly      |
     | **11** | NEGany~merely      |   5,944 |  0.13 |  2.26 |   9,223.66 | 86,330,752 | 3,226,213 |  35,608 |  1,330.68 |   4,613.32 | NEGATED | merely      |
     | **12** | NEGany~any         |  15,492 |  0.13 |  2.28 |  23,683.00 | 86,330,752 | 3,226,213 |  94,152 |  3,518.50 |  11,973.50 | NEGATED | any         |
     | **13** | NEGany~always      | 104,605 |  0.12 |  2.28 | 157,437.56 | 86,330,752 | 3,226,213 | 651,053 | 24,330.10 |  80,274.90 | NEGATED | always      |
     | **14** | NEGany~directly    |   8,317 |  0.12 |  2.13 |  11,654.57 | 86,330,752 | 3,226,213 |  54,441 |  2,034.48 |   6,282.52 | NEGATED | directly    | -->


| rank   | `key`              |     `f` | `dP1` | `dP1_simple` | `LRC` |       `G2` | `odds_r_disc` |    `t` |        `N` |      `f1` |    `f2` |   `exp_f` |  `unexp_f` |
|:-------|:-------------------|--------:|------:|-------------:|------:|-----------:|--------------:|-------:|-----------:|----------:|--------:|----------:|-----------:|
| **1**  | NEGany~necessarily |  42,595 |  0.83 |         0.87 |  7.10 | 230,257.34 |          2.17 | 196.05 | 72,839,589 | 3,173,660 |  48,947 |  2,132.65 |  40,462.35 |
| **2**  | NEGany~that        | 164,768 |  0.75 |         0.79 |  6.34 | 831,137.25 |          1.94 | 383.56 | 72,839,589 | 3,173,660 | 208,262 |  9,074.09 | 155,693.91 |
| **3**  | NEGany~exactly     |  43,813 |  0.70 |         0.75 |  5.94 | 210,126.60 |          1.82 | 197.11 | 72,839,589 | 3,173,660 |  58,643 |  2,555.11 |  41,257.89 |
| **4**  | NEGany~immediately |  56,099 |  0.54 |         0.58 |  4.86 | 224,059.55 |          1.49 | 219.01 | 72,839,589 | 3,173,660 |  96,973 |  4,225.17 |  51,873.83 |
| **5**  | NEGany~yet         |  51,867 |  0.50 |         0.54 |  4.65 | 197,610.98 |          1.42 | 209.42 | 72,839,589 | 3,173,660 |  95,763 |  4,172.45 |  47,694.55 |
| **6**  | NEGany~before      |     308 |  0.41 |         0.45 |  3.56 |   1,025.56 |          1.26 |  15.86 | 72,839,589 | 3,173,660 |     681 |     29.67 |     278.33 |
| **7**  | NEGany~any         |  15,384 |  0.40 |         0.45 |  4.07 |  50,880.96 |          1.25 | 111.95 | 72,839,589 | 3,173,660 |  34,382 |  1,498.04 |  13,885.96 |
| **8**  | NEGany~anymore     |     422 |  0.39 |         0.44 |  3.56 |   1,366.20 |          1.23 |  18.49 | 72,839,589 | 3,173,660 |     969 |     42.22 |     379.78 |
| **9**  | NEGany~remotely    |   5,661 |  0.30 |         0.34 |  3.40 |  15,284.49 |          1.06 |  65.73 | 72,839,589 | 3,173,660 |  16,426 |    715.69 |   4,945.31 |
| **10** | NEGany~terribly    |  17,949 |  0.26 |         0.30 |  3.19 |  43,741.44 |          0.98 | 114.80 | 72,839,589 | 3,173,660 |  58,964 |  2,569.09 |  15,379.91 |
| **11** | NEGany~only        | 113,502 |  0.22 |         0.26 |  2.92 | 243,219.14 |          0.90 | 280.57 | 72,839,589 | 3,173,660 | 435,592 | 18,978.98 |  94,523.02 |
| **12** | NEGany~altogether  |   4,568 |  0.21 |         0.25 |  2.76 |   9,419.50 |          0.87 |  55.96 | 72,839,589 | 3,173,660 |  18,033 |    785.71 |   3,782.29 |
| **13** | NEGany~overly      |  24,613 |  0.20 |         0.24 |  2.77 |  49,095.72 |          0.85 | 128.88 | 72,839,589 | 3,173,660 | 100,826 |  4,393.04 |  20,219.96 |
| **14** | NEGany~entirely    |  63,321 |  0.19 |         0.23 |  2.70 | 121,162.48 |          0.83 | 204.57 | 72,839,589 | 3,173,660 | 271,851 | 11,844.69 |  51,476.31 |
| **15** | NEGany~consciously |     925 |  0.19 |         0.23 |  2.43 |   1,755.61 |          0.82 |  24.72 | 72,839,589 | 3,173,660 |   3,975 |    173.19 |     751.81 |




In [188]:
nb_show_table(mirror_top15.reset_index().filter(regex=r'^[^l]'))


|        | `key`                |   `f` |   `dP1` |   `dP1_simple` |   `LRC` |      `G2` |   `odds_r_disc` |   `t` |       `N` |    `f1` |   `f2` |   `exp_f` |   `unexp_f` |
|:-------|:---------------------|------:|--------:|---------------:|--------:|----------:|----------------:|------:|----------:|--------:|-------:|----------:|------------:|
| **1**  | NEGmir~ever          | 4,709 |    0.76 |           0.93 |    5.63 | 14,253.57 |            1.82 | 55.98 | 1,701,929 | 291,732 |  5,060 |    867.35 |    3,841.65 |
| **2**  | NEGmir~any           | 1,066 |    0.72 |           0.89 |    4.65 |  2,985.75 |            1.59 | 26.37 | 1,701,929 | 291,732 |  1,197 |    205.18 |      860.82 |
| **3**  | NEGmir~necessarily   |   963 |    0.70 |           0.87 |    4.39 |  2,597.68 |            1.51 | 24.92 | 1,701,929 | 291,732 |  1,107 |    189.75 |      773.25 |
| **4**  | NEGmir~remotely      | 1,840 |    0.62 |           0.79 |    3.79 |  4,256.34 |            1.25 | 33.54 | 1,701,929 | 

### _Previous_ Before Additional Filtering

15 Most Negatively Associated Adverbs for `mirror` subset (_Present Positive_ approximation) as ranked by $\Delta P(1|2)$ (`dP1`) and $LRC$
<!-- 
|        | `key`                |   `f` | `dP1` | `LRC` |      `G2` |       `N` |    `f1` |   `f2` |  `exp_f` | `unexp_f` | `l1`   | `l2`          |
|:-------|:---------------------|------:|------:|------:|----------:|----------:|--------:|-------:|---------:|----------:|:-------|:--------------|
| **0**  | NEGmir~before        |   290 |  0.84 |  5.11 |  1,080.52 | 2,032,082 | 293,963 |    294 |    42.53 |    247.47 | NEGMIR | before        |
| **1**  | NEGmir~ever          | 4,718 |  0.77 |  5.57 | 15,340.34 | 2,032,082 | 293,963 |  5,179 |   749.20 |  3,968.80 | NEGMIR | ever          |
| **2**  | NEGmir~exactly       |   813 |  0.59 |  3.51 |  1,939.47 | 2,032,082 | 293,963 |  1,114 |   161.15 |    651.85 | NEGMIR | exactly       |
| **3**  | NEGmir~any           | 1,082 |  0.57 |  3.48 |  2,511.26 | 2,032,082 | 293,963 |  1,514 |   219.02 |    862.98 | NEGMIR | any           |
| **4**  | NEGmir~remotely      | 1,846 |  0.54 |  3.35 |  4,009.84 | 2,032,082 | 293,963 |  2,717 |   393.04 |  1,452.96 | NEGMIR | remotely      |
| **5**  | NEGmir~particularly  | 9,278 |  0.48 |  3.15 | 17,999.07 | 2,032,082 | 293,963 | 14,954 | 2,163.26 |  7,114.74 | NEGMIR | particularly  |
| **6**  | NEGmir~that          | 4,338 |  0.44 |  2.86 |  7,632.21 | 2,032,082 | 293,963 |  7,472 | 1,080.91 |  3,257.09 | NEGMIR | that          |
| **7**  | NEGmir~necessarily   |   971 |  0.43 |  2.66 |  1,688.91 | 2,032,082 | 293,963 |  1,681 |   243.18 |    727.82 | NEGMIR | necessarily   |
| **8**  | NEGmir~inherently    | 2,872 |  0.36 |  2.42 |  4,160.38 | 2,032,082 | 293,963 |  5,649 |   817.19 |  2,054.81 | NEGMIR | inherently    |
| **9**  | NEGmir~overtly       |   392 |  0.29 |  1.71 |    443.78 | 2,032,082 | 293,963 |    898 |   129.91 |    262.09 | NEGMIR | overtly       |
| **10** | NEGmir~intrinsically |   432 |  0.29 |  1.73 |    487.95 | 2,032,082 | 293,963 |    991 |   143.36 |    288.64 | NEGMIR | intrinsically |
| **11** | NEGmir~especially    | 1,573 |  0.21 |  1.49 |  1,232.03 | 2,032,082 | 293,963 |  4,400 |   636.51 |    936.49 | NEGMIR | especially    |
| **12** | NEGmir~yet           |   320 |  0.21 |  1.18 |    242.23 | 2,032,082 | 293,963 |    909 |   131.50 |    188.50 | NEGMIR | yet           |
| **13** | NEGmir~fully         | 1,668 |  0.18 |  1.31 |  1,086.24 | 2,032,082 | 293,963 |  5,084 |   735.46 |    932.54 | NEGMIR | fully         |
| **14** | NEGmir~terribly      | 1,579 |  0.16 |  1.14 |    847.65 | 2,032,082 | 293,963 |  5,218 |   754.84 |    824.16 | NEGMIR | terribly      |
-->

| rank   | `key`                |   `f` | `dP1` | `dP1_simple` | `LRC` |      `G2` | `odds_r_disc` |   `t` |       `N` |    `f1` |   `f2` |  `exp_f` | `unexp_f` |
|:-------|:---------------------|------:|------:|-------------:|------:|----------:|--------------:|------:|----------:|--------:|-------:|---------:|----------:|
| **1**  | NEGmir~ever          | 4,709 |  0.76 |         0.93 |  5.63 | 14,253.57 |          1.82 | 55.98 | 1,701,929 | 291,732 |  5,060 |   867.35 |  3,841.65 |
| **2**  | NEGmir~any           | 1,066 |  0.72 |         0.89 |  4.65 |  2,985.75 |          1.59 | 26.37 | 1,701,929 | 291,732 |  1,197 |   205.18 |    860.82 |
| **3**  | NEGmir~necessarily   |   963 |  0.70 |         0.87 |  4.39 |  2,597.68 |          1.51 | 24.92 | 1,701,929 | 291,732 |  1,107 |   189.75 |    773.25 |
| **4**  | NEGmir~remotely      | 1,840 |  0.62 |         0.79 |  3.79 |  4,256.34 |          1.25 | 33.54 | 1,701,929 | 291,732 |  2,341 |   401.28 |  1,438.72 |
| **5**  | NEGmir~that          | 4,308 |  0.61 |         0.78 |  3.90 |  9,957.37 |          1.25 | 51.29 | 1,701,929 | 291,732 |  5,494 |   941.74 |  3,366.26 |
| **6**  | NEGmir~exactly       |   813 |  0.61 |         0.78 |  3.57 |  1,860.72 |          1.24 | 22.25 | 1,701,929 | 291,732 |  1,041 |   178.44 |    634.56 |
| **7**  | NEGmir~particularly  | 9,243 |  0.54 |         0.71 |  3.43 | 18,583.81 |          1.09 | 72.96 | 1,701,929 | 291,732 | 13,003 | 2,228.88 |  7,014.12 |
| **8**  | NEGmir~inherently    | 2,864 |  0.39 |         0.56 |  2.40 |  3,925.31 |          0.79 | 37.08 | 1,701,929 | 291,732 |  5,133 |   879.86 |  1,984.14 |
| **9**  | NEGmir~overtly       |   391 |  0.35 |         0.53 |  1.89 |    483.89 |          0.73 | 13.33 | 1,701,929 | 291,732 |    743 |   127.36 |    263.64 |
| **10** | NEGmir~intrinsically |   433 |  0.32 |         0.49 |  1.70 |    466.38 |          0.66 | 13.48 | 1,701,929 | 291,732 |    890 |   152.56 |    280.44 |
| **11** | NEGmir~especially    | 1,569 |  0.23 |         0.40 |  1.45 |  1,140.80 |          0.51 | 22.62 | 1,701,929 | 291,732 |  3,926 |   672.97 |    896.03 |
| **12** | NEGmir~yet           |   320 |  0.22 |         0.39 |  1.11 |    223.08 |          0.50 | 10.08 | 1,701,929 | 291,732 |    815 |   139.70 |    180.30 |
| **13** | NEGmir~fully         | 1,664 |  0.19 |         0.36 |  1.23 |    957.30 |          0.44 | 21.47 | 1,701,929 | 291,732 |  4,598 |   788.15 |    875.85 |
| **14** | NEGmir~terribly      | 1,567 |  0.17 |         0.34 |  1.09 |    764.44 |          0.40 | 19.62 | 1,701,929 | 291,732 |  4,610 |   790.21 |    776.79 |

### Or here, the least "negative"/most "non-negative"

In [189]:
def show_top_positive(adv_df, 
                      k:int=15, 
                      filter_and_sort:list=['conservative_log_ratio', 
                                            'am_log_likelihood', 
                                            'am_p1_given2']):
    
    _l1 = adv_df.filter(like='O', axis=0).l1.iat[0].lower().strip()
    _N = int(adv_df.N.iat[0])
    ie = '(`set_diff`, $*\complement_{N^+}$)' if _l1.startswith("com") else '(`mirror`, $@P$)'
    print(f'#### Adverbs in top {k}',
          r'for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$',
          f'measuring association with *{_l1.capitalize()}* Environments {ie}', 
          end='\n'*2)
    print(f'Total Tokens in dataset: $N = {_N:,}$')
    nb_show_table(
        get_top_vals(
            adv_df.filter(items=FOCUS), 
            k=k,
            metric_filter=filter_and_sort,
            index_like='O',  # should match "POS" & "COM", but neither "NEG*"
            ).round(2).sort_values(filter_and_sort, ascending=False).set_index('l2').drop(['N', 'l1'], axis=1)
    )
    
# All data
show_top_positive(setdiff_adv, k=15)

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Complement* Environments (`set_diff`, $*\complement_{N^+}$)

Total Tokens in dataset: $N = 72,839,589$



|                  |          `f` |   `dP1` |   `dP1_simple` |   `LRC` |       `G2` |   `odds_r_disc` |    `t` |          `f1` |         `f2` |      `exp_f` |   `unexp_f` |
|:-----------------|-------------:|--------:|---------------:|--------:|-----------:|----------------:|-------:|--------------:|-------------:|-------------:|------------:|
| **increasingly** |   374,465.00 |    0.04 |           1.00 |    7.03 |  32,549.25 |            2.37 |  26.58 | 69,662,736.00 |   374,538.00 |   358,202.76 |   16,262.24 |
| **relatively**   |   583,426.00 |    0.04 |           1.00 |    5.96 |  48,820.59 |            1.93 |  32.92 | 69,662,736.00 |   583,744.00 |   558,284.37 |   25,141.63 |
| **almost**       |   434,507.00 |    0.04 |           1.00 |    5.26 |  35,003.16 |            1.70 |  28.17 | 69,662,736.00 |   434,904.00 |   415,935.93 |   18,571.07 |
| **mostly**       |   199,883.00 |    0.04 |           1.00 |    5.09 |  16,071.00 |            1.70 |  19.11 | 69,662,736.00 |   200

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Complement* Environments (`set_diff`, $*\complement_{N^+}$)

Total Tokens in dataset: $N = 72,839,589$

|                  |       `f` | `dP1` | `dP1_simple` | `LRC` |       `G2` | `odds_r_disc` |    `t` |       `f1` |      `f2` |      `exp_f` |  `unexp_f` |
|:-----------------|----------:|------:|-------------:|------:|-----------:|--------------:|-------:|-----------:|----------:|-------------:|-----------:|
| **increasingly** |   374,465 |  0.04 |         1.00 |  7.03 |  32,549.25 |          2.37 |  26.58 | 69,662,736 |   374,538 |   358,202.76 |  16,262.24 |
| **relatively**   |   583,426 |  0.04 |         1.00 |  5.96 |  48,820.59 |          1.93 |  32.92 | 69,662,736 |   583,744 |   558,284.37 |  25,141.63 |
| **almost**       |   434,507 |  0.04 |         1.00 |  5.26 |  35,003.16 |          1.70 |  28.17 | 69,662,736 |   434,904 |   415,935.93 |  18,571.07 |
| **mostly**       |   199,883 |  0.04 |         1.00 |  5.09 |  16,071.00 |          1.70 |  19.11 | 69,662,736 |   200,066 |   191,340.25 |   8,542.75 |
| **seemingly**    |   161,276 |  0.04 |         1.00 |  5.03 |  12,968.22 |          1.70 |  17.17 | 69,662,736 |   161,423 |   154,382.64 |   6,893.36 |
| **fairly**       |   371,923 |  0.04 |         1.00 |  4.97 |  29,366.08 |          1.61 |  25.94 | 69,662,736 |   372,340 |   356,100.62 |  15,822.38 |
| **pretty**       | 1,511,615 |  0.04 |         1.00 |  4.96 | 118,512.77 |          1.56 |  52.10 | 69,662,736 | 1,513,571 | 1,447,557.55 |  64,057.45 |
| **largely**      |   173,667 |  0.04 |         1.00 |  4.87 |  13,763.38 |          1.63 |  17.75 | 69,662,736 |   173,852 |   166,269.55 |   7,397.45 |
| **partly**       |    78,775 |  0.04 |         1.00 |  4.81 |   6,336.61 |          1.70 |  12.00 | 69,662,736 |    78,846 |    75,407.18 |   3,367.82 |
| **albeit**       |    15,742 |  0.04 |         1.00 |  4.80 |   1,365.55 |          2.31 |   5.45 | 69,662,736 |    15,745 |    15,058.29 |     683.71 |
| **rather**       |   363,581 |  0.04 |         1.00 |  4.74 |  28,124.19 |          1.53 |  25.52 | 69,662,736 |   364,070 |   348,191.32 |  15,389.68 |
| **sometimes**    |   141,910 |  0.04 |         1.00 |  4.55 |  10,971.56 |          1.53 |  15.95 | 69,662,736 |   142,099 |   135,901.44 |   6,008.56 |
| **also**         | 1,062,622 |  0.04 |         1.00 |  4.45 |  79,069.47 |          1.40 |  43.14 | 69,662,736 | 1,064,588 | 1,018,156.66 |  44,465.34 |
| **supposedly**   |    27,562 |  0.04 |         1.00 |  4.40 |   2,238.61 |          1.75 |   7.11 | 69,662,736 |    27,584 |    26,380.94 |   1,181.06 |
| **virtually**    |    86,032 |  0.04 |         1.00 |  4.32 |   6,583.57 |          1.50 |  12.39 | 69,662,736 |    86,156 |    82,398.36 |   3,633.64 |
| **now**          |   434,154 |  0.04 |         1.00 |  4.27 |  31,839.80 |          1.37 |  27.50 | 69,662,736 |   435,006 |   416,033.49 |  18,120.51 |
| **most**         | 7,137,718 |  0.05 |         1.00 |  4.03 | 521,448.58 |          1.27 | 109.64 | 69,662,736 | 7,156,931 | 6,844,785.94 | 292,932.06 |
| **still**        |   772,221 |  0.04 |         1.00 |  3.99 |  54,106.56 |          1.26 |  36.19 | 69,662,736 |   774,186 |   740,420.36 |  31,800.64 |
| **highly**       |   733,697 |  0.04 |         1.00 |  3.64 |  48,462.47 |          1.15 |  34.67 | 69,662,736 |   736,102 |   703,997.37 |  29,699.63 |
| **preferably**   |     4,802 |  0.04 |         1.00 |  3.40 |     415.61 |          2.16 |   3.01 | 69,662,736 |     4,803 |     4,593.52 |     208.48 |
| **extremely**    |   913,707 |  0.04 |         1.00 |  3.25 |  55,385.18 |          1.02 |  37.66 | 69,662,736 |   917,735 |   877,708.56 |  35,998.44 |
| **alternately**  |     3,896 |  0.04 |         1.00 |  3.10 |     335.22 |          2.07 |   2.71 | 69,662,736 |     3,897 |     3,727.03 |     168.97 |
| **yearly**       |     2,485 |  0.04 |         1.00 |  2.68 |     221.64 |          2.36 |   2.17 | 69,662,736 |     2,485 |     2,376.62 |     108.38 |
| **less**         | 1,156,675 |  0.03 |         0.99 |  1.90 |  40,981.00 |          0.60 |  34.93 | 69,662,736 | 1,170,138 | 1,119,103.16 |  37,571.84 |
| **legendarily**  |     1,018 |  0.04 |         1.00 |  1.38 |      90.79 |          1.97 |   1.39 | 69,662,736 |     1,018 |       973.60 |      44.40 |
| **more**         | 8,595,325 |  0.03 |         0.98 |  1.26 | 185,410.35 |          0.42 |  75.02 | 69,662,736 | 8,757,314 | 8,375,369.24 | 219,955.76 |
| **fourth**       |       918 |  0.04 |         1.00 |  1.23 |      81.88 |          1.92 |   1.32 | 69,662,736 |       918 |       877.96 |      40.04 |
| **eagerly**      |       896 |  0.04 |         1.00 |  1.20 |      79.91 |          1.91 |   1.31 | 69,662,736 |       896 |       856.92 |      39.08 |
| **whereby**      |       886 |  0.04 |         1.00 |  1.18 |      79.02 |          1.91 |   1.30 | 69,662,736 |       886 |       847.36 |      38.64 |
| **variously**    |       857 |  0.04 |         1.00 |  1.13 |      76.43 |          1.89 |   1.28 | 69,662,736 |       857 |       819.62 |      37.38 |
| **second-most**  |       850 |  0.04 |         1.00 |  1.12 |      75.81 |          1.89 |   1.27 | 69,662,736 |       850 |       812.93 |      37.07 |
| **very**         | 9,065,101 |  0.03 |         0.98 |  1.10 | 163,181.02 |          0.37 |  71.02 | 69,662,736 | 9,254,924 | 8,851,276.29 | 213,824.71 |
| **successively** |       795 |  0.04 |         1.00 |  1.02 |      70.90 |          1.86 |   1.23 | 69,662,736 |       795 |       760.33 |      34.67 |

**Was**:

> Total Tokens in dataset: $N = 86,330,752$
> 
> |                    |       `f` | `dP1` | `LRC` |       `G2` |       `f1` |      `f2` |      `exp_f` |  `unexp_f` | `dP2` | `dP1_simple` |
> |:-------------------|----------:|------:|------:|-----------:|-----------:|----------:|-------------:|-----------:|------:|-------------:|
> | **increasingly**   |   404,356 |  0.04 |  6.00 |  29,076.69 | 83,102,035 |   404,521 |   389,392.16 |  14,963.84 |  0.00 |         1.00 |
> | **relatively**     |   626,369 |  0.04 |  5.24 |  42,957.87 | 83,102,035 |   626,884 |   603,438.92 |  22,930.08 |  0.01 |         1.00 |
> | **almost**         |   466,468 |  0.04 |  4.85 |  31,107.72 | 83,102,035 |   466,967 |   449,502.72 |  16,965.28 |  0.01 |         1.00 |
> | **seemingly**      |   176,135 |  0.04 |  4.77 |  11,864.41 | 83,102,035 |   176,304 |   169,710.34 |   6,424.66 |  0.00 |         1.00 |
> | **mostly**         |   212,255 |  0.04 |  4.71 |  14,160.67 | 83,102,035 |   212,478 |   204,531.45 |   7,723.55 |  0.00 |         1.00 |
> | **pretty**         | 1,650,041 |  0.04 |  4.64 | 107,081.72 | 83,102,035 | 1,652,360 | 1,590,562.75 |  59,478.25 |  0.02 |         1.00 |
> | **fairly**         |   401,326 |  0.04 |  4.50 |  25,904.34 | 83,102,035 |   401,879 |   386,848.97 |  14,477.03 |  0.00 |         1.00 |
> | **partly**         |    80,461 |  0.04 |  4.50 |   5,418.01 | 83,102,035 |    80,538 |    77,525.93 |   2,935.07 |  0.00 |         1.00 |
> | **rather**         |   402,067 |  0.04 |  4.44 |  25,775.15 | 83,102,035 |   402,648 |   387,589.21 |  14,477.79 |  0.00 |         1.00 |
> | **largely**        |   186,382 |  0.04 |  4.36 |  12,018.96 | 83,102,035 |   186,638 |   179,657.85 |   6,724.15 |  0.00 |         1.00 |
> | **sometimes**      |   154,738 |  0.04 |  4.25 |   9,894.59 | 83,102,035 |   154,963 |   149,167.48 |   5,570.52 |  0.00 |         1.00 |
> | **also**           | 1,135,038 |  0.04 |  4.13 |  69,302.53 | 83,102,035 | 1,137,293 | 1,094,758.94 |  40,279.06 |  0.01 |         1.00 |
> | **supposedly**     |    30,854 |  0.04 |  4.13 |   2,118.60 | 83,102,035 |    30,878 |    29,723.18 |   1,130.82 |  0.00 |         1.00 |
> | **once**           |   108,130 |  0.04 |  4.01 |   6,779.79 | 83,102,035 |   108,308 |   104,257.35 |   3,872.65 |  0.00 |         1.00 |
> | **certainly**      |   107,358 |  0.04 |  3.98 |   6,710.96 | 83,102,035 |   107,538 |   103,516.14 |   3,841.85 |  0.00 |         1.00 |
> | **now**            |   456,039 |  0.04 |  3.88 |  27,026.68 | 83,102,035 |   457,065 |   439,971.05 |  16,067.95 |  0.01 |         1.00 |
> | **most**           | 7,713,908 |  0.04 |  3.84 | 465,492.10 | 83,102,035 | 7,734,027 | 7,444,779.15 | 269,128.85 |  0.09 |         1.00 |
> | **slightly**       |   399,124 |  0.03 |  3.63 |  22,711.33 | 83,102,035 |   400,193 |   385,226.03 |  13,897.97 |  0.00 |         1.00 |
> | **still**          |   854,311 |  0.03 |  3.55 |  47,347.08 | 83,102,035 |   856,873 |   824,826.48 |  29,484.52 |  0.01 |         1.00 |
> | **albeit**         |    17,169 |  0.04 |  3.53 |   1,270.78 | 83,102,035 |    17,172 |    16,529.78 |     639.22 |  0.00 |         1.00 |
> | **admittedly**     |    13,998 |  0.04 |  3.34 |     945.10 | 83,102,035 |    14,011 |    13,487.00 |     511.00 |  0.00 |         1.00 |
> | **understandably** |    13,111 |  0.04 |  3.24 |     879.17 | 83,102,035 |    13,124 |    12,633.17 |     477.83 |  0.00 |         1.00 |
> | **highly**         |   789,705 |  0.03 |  3.08 |  39,233.91 | 83,102,035 |   793,031 |   763,372.13 |  26,332.87 |  0.01 |         1.00 |
> | **extremely**      |   986,551 |  0.03 |  2.69 |  43,399.83 | 83,102,035 |   992,094 |   954,990.29 |  31,560.71 |  0.01 |         0.99 |
> | **hopefully**      |     7,834 |  0.04 |  2.65 |     530.95 | 83,102,035 |     7,841 |     7,547.75 |     286.25 |  0.00 |         1.00 |
> | **presumably**     |     8,011 |  0.04 |  2.59 |     568.20 | 83,102,035 |     8,015 |     7,715.24 |     295.76 |  0.00 |         1.00 |
> | **less**           | 1,286,169 |  0.03 |  1.71 |  34,129.70 | 83,102,035 | 1,300,817 | 1,252,167.24 |  34,001.76 |  0.01 |         0.99 |
> | **alternately**    |     4,148 |  0.04 |  1.11 |     294.82 | 83,102,035 |     4,150 |     3,994.79 |     153.21 |  0.00 |         1.00 |
> | **more**           | 9,438,165 |  0.02 |  1.10 | 141,966.92 | 83,102,035 | 9,607,426 | 9,248,114.18 | 190,050.82 |  0.06 |         0.98 |


In [190]:
# Mirror Data ~ explicitly positive ~ positive trigger present
show_top_positive(mirror_adv, k=15)

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Posmir* Environments (`mirror`, $@P$)

Total Tokens in dataset: $N = 1,701,929$

|                 |        `f` |   `dP1` |   `dP1_simple` |   `LRC` |      `G2` |   `odds_r_disc` |   `t` |         `f1` |       `f2` |    `exp_f` |   `unexp_f` |
|:----------------|-----------:|--------:|---------------:|--------:|----------:|----------------:|------:|-------------:|-----------:|-----------:|------------:|
| **pretty**      |  24,593.00 |    0.17 |           0.99 |    4.71 |  8,175.85 |            1.61 | 26.21 | 1,410,172.00 |  24,720.00 |  20,482.32 |    4,110.68 |
| **rather**      |   8,383.00 |    0.17 |           1.00 |    4.62 |  2,853.50 |            1.73 | 15.41 | 1,410,172.00 |   8,415.00 |   6,972.44 |    1,410.56 |
| **plain**       |   5,062.00 |    0.17 |           1.00 |    4.44 |  1,739.08 |            1.78 | 12.00 | 1,410,172.00 |   5,079.00 |   4,208.32 |      85

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Posmir* Environments (`mirror`, $@P$)

Total Tokens in dataset: $N = 1,701,929$

|                 |     `f` | `dP1` | `dP1_simple` | `LRC` |      `G2` | `odds_r_disc` |   `t` |      `f1` |    `f2` |    `exp_f` | `unexp_f` |
|:----------------|--------:|------:|-------------:|------:|----------:|--------------:|------:|----------:|--------:|-----------:|----------:|
| **pretty**      |  24,593 |  0.17 |         0.99 |  4.71 |  8,175.85 |          1.61 | 26.21 | 1,410,172 |  24,720 |  20,482.32 |  4,110.68 |
| **rather**      |   8,383 |  0.17 |         1.00 |  4.62 |  2,853.50 |          1.73 | 15.41 | 1,410,172 |   8,415 |   6,972.44 |  1,410.56 |
| **plain**       |   5,062 |  0.17 |         1.00 |  4.44 |  1,739.08 |          1.78 | 12.00 | 1,410,172 |   5,079 |   4,208.32 |    853.68 |
| **fairly**      |   5,703 |  0.17 |         1.00 |  4.32 |  1,922.67 |          1.68 | 12.68 | 1,410,172 |   5,727 |   4,745.24 |    957.76 |
| **somewhat**    |   4,482 |  0.17 |         1.00 |  4.31 |  1,532.10 |          1.75 | 11.28 | 1,410,172 |   4,498 |   3,726.92 |    755.08 |
| **otherwise**   |   6,857 |  0.17 |         0.99 |  4.06 |  2,220.20 |          1.53 | 13.78 | 1,410,172 |   6,899 |   5,716.32 |  1,140.68 |
| **maybe**       |   2,672 |  0.17 |         1.00 |  3.98 |    917.02 |          1.77 |  8.72 | 1,410,172 |   2,681 |   2,221.40 |    450.60 |
| **downright**   |   4,726 |  0.17 |         0.99 |  3.88 |  1,528.70 |          1.52 | 11.44 | 1,410,172 |   4,755 |   3,939.86 |    786.14 |
| **already**     |   4,275 |  0.17 |         0.99 |  3.80 |  1,377.50 |          1.51 | 10.87 | 1,410,172 |   4,302 |   3,564.52 |    710.48 |
| **relatively**  |   5,307 |  0.16 |         0.99 |  3.79 |  1,681.55 |          1.46 | 12.06 | 1,410,172 |   5,345 |   4,428.72 |    878.28 |
| **almost**      |   5,247 |  0.16 |         0.99 |  3.70 |  1,640.95 |          1.42 | 11.95 | 1,410,172 |   5,288 |   4,381.49 |    865.51 |
| **equally**     |   7,316 |  0.16 |         0.99 |  3.58 |  2,195.55 |          1.32 | 13.96 | 1,410,172 |   7,389 |   6,122.32 |  1,193.68 |
| **perhaps**     |   3,526 |  0.16 |         0.99 |  3.52 |  1,105.47 |          1.42 |  9.80 | 1,410,172 |   3,553 |   2,943.92 |    582.08 |
| **highly**      |   9,134 |  0.16 |         0.99 |  3.26 |  2,535.26 |          1.16 | 15.26 | 1,410,172 |   9,264 |   7,675.90 |  1,458.10 |
| **slightly**    |   7,559 |  0.16 |         0.99 |  3.22 |  2,104.79 |          1.17 | 13.89 | 1,410,172 |   7,665 |   6,351.01 |  1,207.99 |
| **extremely**   |  17,231 |  0.16 |         0.98 |  3.14 |  4,506.02 |          1.07 | 20.57 | 1,410,172 |  17,537 |  14,530.68 |  2,700.32 |
| **also**        |   6,878 |  0.16 |         0.99 |  3.12 |  1,887.00 |          1.14 | 13.20 | 1,410,172 |   6,980 |   5,783.44 |  1,094.56 |
| **simply**      |   7,799 |  0.16 |         0.98 |  3.01 |  2,062.00 |          1.09 | 13.90 | 1,410,172 |   7,931 |   6,571.41 |  1,227.59 |
| **still**       |  13,308 |  0.15 |         0.98 |  2.96 |  3,364.12 |          1.03 | 17.90 | 1,410,172 |  13,569 |  11,242.90 |  2,065.10 |
| **incredibly**  |   8,831 |  0.15 |         0.98 |  2.90 |  2,251.34 |          1.04 | 14.63 | 1,410,172 |   8,999 |   7,456.33 |  1,374.67 |
| **just**        |  27,910 |  0.15 |         0.97 |  2.68 |  6,262.48 |          0.89 | 24.91 | 1,410,172 |  28,662 |  23,748.55 |  4,161.45 |
| **even**        |  59,871 |  0.13 |         0.95 |  1.99 |  9,679.52 |          0.65 | 32.34 | 1,410,172 |  62,709 |  51,958.97 |  7,912.03 |
| **eerily**      |     402 |  0.17 |         1.00 |  1.96 |    133.04 |          1.52 |  3.35 | 1,410,172 |     404 |     334.74 |     67.26 |
| **very**        | 176,341 |  0.14 |         0.95 |  1.95 | 28,647.86 |          0.66 | 54.40 | 1,410,172 | 185,255 | 153,497.25 | 22,843.75 |
| **lightly**     |     399 |  0.17 |         1.00 |  1.95 |    131.94 |          1.52 |  3.34 | 1,410,172 |     401 |     332.26 |     66.74 |
| **darn**        |     439 |  0.16 |         0.99 |  1.94 |    139.78 |          1.41 |  3.47 | 1,410,172 |     442 |     366.23 |     72.77 |
| **chronically** |     325 |  0.17 |         0.99 |  1.65 |    104.92 |          1.43 |  3.00 | 1,410,172 |     327 |     270.94 |     54.06 |


**Was**: 
> Total Tokens in dataset: $N = 1,761,853$
> 
> |                  |     `f` | `dP1` | `LRC` |      `G2` |      `f1` |    `f2` |    `exp_f` | `unexp_f` | `dP2` | `dP1_simple` |
> |:-----------------|--------:|------:|------:|----------:|----------:|--------:|-----------:|----------:|------:|-------------:|
> | **pretty**       |  24,599 |  0.16 |  4.59 |  7,751.98 | 1,472,036 |  24,729 |  20,661.19 |  3,937.81 |  0.02 |         0.99 |
> | **rather**       |   8,259 |  0.16 |  4.39 |  2,671.93 | 1,472,036 |   8,291 |   6,927.17 |  1,331.83 |  0.01 |         1.00 |
> | **plain**        |   5,053 |  0.16 |  4.15 |  1,660.56 | 1,472,036 |   5,069 |   4,235.17 |    817.83 |  0.00 |         1.00 |
> | **fairly**       |   5,678 |  0.16 |  4.07 |  1,820.50 | 1,472,036 |   5,702 |   4,764.05 |    913.95 |  0.00 |         1.00 |
> | **somewhat**     |   4,441 |  0.16 |  3.93 |  1,436.47 | 1,472,036 |   4,458 |   3,724.68 |    716.32 |  0.00 |         1.00 |
> | **otherwise**    |   6,562 |  0.16 |  3.85 |  2,012.72 | 1,472,036 |   6,603 |   5,516.84 |  1,045.16 |  0.00 |         0.99 |
> | **downright**    |   4,730 |  0.16 |  3.64 |  1,446.97 | 1,472,036 |   4,760 |   3,977.00 |    753.00 |  0.00 |         0.99 |
> | **relatively**   |   5,328 |  0.16 |  3.61 |  1,603.27 | 1,472,036 |   5,366 |   4,483.32 |    844.68 |  0.00 |         0.99 |
> | **already**      |   4,277 |  0.16 |  3.54 |  1,302.51 | 1,472,036 |   4,305 |   3,596.85 |    680.15 |  0.00 |         0.99 |
> | **almost**       |   5,286 |  0.16 |  3.47 |  1,551.93 | 1,472,036 |   5,330 |   4,453.24 |    832.76 |  0.00 |         0.99 |
> | **maybe**        |   2,573 |  0.16 |  3.43 |    846.03 | 1,472,036 |   2,581 |   2,156.44 |    416.56 |  0.00 |         1.00 |
> | **equally**      |   7,235 |  0.15 |  3.36 |  2,018.36 | 1,472,036 |   7,314 |   6,110.88 |  1,124.12 |  0.00 |         0.99 |
> | **perhaps**      |   3,353 |  0.16 |  3.22 |    989.23 | 1,472,036 |   3,380 |   2,824.00 |    528.99 |  0.00 |         0.99 |
> | **highly**       |   9,133 |  0.15 |  3.18 |  2,407.55 | 1,472,036 |   9,260 |   7,736.77 |  1,396.23 |  0.01 |         0.99 |
> | **slightly**     |   7,524 |  0.15 |  3.09 |  1,970.46 | 1,472,036 |   7,631 |   6,375.73 |  1,148.27 |  0.00 |         0.99 |
> | **extremely**    |  17,254 |  0.15 |  3.06 |  4,253.50 | 1,472,036 |  17,559 |  14,670.62 |  2,583.38 |  0.01 |         0.98 |
> | **also**         |   6,904 |  0.15 |  3.02 |  1,789.00 | 1,472,036 |   7,006 |   5,853.54 |  1,050.46 |  0.00 |         0.99 |
> | **simply**       |   7,695 |  0.15 |  2.90 |  1,912.92 | 1,472,036 |   7,826 |   6,538.66 |  1,156.34 |  0.00 |         0.98 |
> | **still**        |  13,239 |  0.15 |  2.85 |  3,122.90 | 1,472,036 |  13,504 |  11,282.65 |  1,956.35 |  0.01 |         0.98 |
> | **incredibly**   |   8,847 |  0.15 |  2.81 |  2,118.18 | 1,472,036 |   9,016 |   7,532.91 |  1,314.09 |  0.01 |         0.98 |
> | **just**         |  27,625 |  0.14 |  2.60 |  5,785.97 | 1,472,036 |  28,371 |  23,704.10 |  3,920.90 |  0.02 |         0.97 |
> | **surprisingly** |   1,439 |  0.16 |  2.50 |    427.85 | 1,472,036 |   1,450 |   1,211.48 |    227.52 |  0.00 |         0.99 |
> | **sometimes**    |   1,302 |  0.16 |  2.36 |    380.76 | 1,472,036 |   1,313 |   1,097.02 |    204.98 |  0.00 |         0.99 |
> | **even**         |  58,121 |  0.12 |  1.89 |  8,471.57 | 1,472,036 |  60,933 |  50,909.79 |  7,211.21 |  0.03 |         0.95 |
> | **very**         | 175,104 |  0.13 |  1.88 | 25,839.60 | 1,472,036 | 184,008 | 153,739.50 | 21,364.50 |  0.09 |         0.95 |
> | **strangely**    |     696 |  0.16 |  1.56 |    202.78 | 1,472,036 |     702 |     586.52 |    109.48 |  0.00 |         0.99 |



## Compile top NEG~adverb associations across both approximation methods

### Define the functions

In [191]:
def load_backup(
                adv_set:set,
    lower_floor: int = 100,
                loaded_path: Path = adv_am_paths['RBdirect'], 
                ) -> pd.DataFrame:
    located_paths = tuple(loaded_path.parent.glob(
        f'*ALL*min{round(SET_FLOOR//3, -2)}x*parq'))
    if any(located_paths):
        backup_df = pd.read_parquet(located_paths[0], columns=FOCUS, filters=[('l2', 'in', adv_set)])

        backup_df = backup_df.filter(like='NEG', axis=0).reset_index().set_index('l2')
        backup_df.index.name = 'adv'
        return backup_df
    else:
        return []


def uncat(df):
    cats = df.select_dtypes('category').columns
    df[cats] = df[cats].astype('string')
    # print(df.dtypes)
    return df, cats


def fill_empties(name_1, name_2, both, loaded_paths, adv_set):
    for name in (name_1, name_2):
        name = name.strip('_')
        path = loaded_paths['RBdirect'] if name == 'SET' else loaded_paths['mirror']
        if any(both[f'f_{name}'].isna()):

            floor = 100
            neg_backup = load_backup(lower_floor=floor, loaded_path=path, adv_set=adv_set)
            if not any(neg_backup):
                print('Error. Backup data not found. [in fill_empties()]')

            neg_backup.columns = (pd.Series(adjust_assoc_columns(neg_backup.columns)
                                            ) + f'_{name}').to_list()
            both, cats = uncat(both)
            neg_backup, __ = uncat(neg_backup)

            undefined_adv = both.loc[
                both[f'f_{name}'].isna(), :].index.to_list()

            both.loc[undefined_adv,
                     neg_backup.columns] = neg_backup.filter(items=undefined_adv, axis=0)

            both[cats] = both[cats].astype('category')

    return both


def combine_top(df_1: pd.DataFrame,
                name_1: str,
                df_2: pd.DataFrame,
                name_2: str,
                env_filter: str = 'NEG',
                filter_items: list = FOCUS,
                k: int = 10) -> pd.DataFrame:
    print(f'### `{TAG}` Most Negative Adverb Selections')
    top_dfs = [
        (get_top_vals(adv_df,  k=k,
                      index_like=env_filter,
                      metric_filter=['am_p1_given2',
                                     'conservative_log_ratio'])
         .sort_values('conservative_log_ratio', ascending=False))
        for adv_df in [df_1, df_2]
    ]
    for i, name in enumerate([name_1, name_2]):

        print_iter(
            [f'_{w}_' for w in top_dfs[i].l2], bullet='1.',
            header=(f'`{name}`: union of top {k} adverbs ranked by '
                    r'$LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$'))
    top_adv_lists = [dx.l2.to_list() for dx in top_dfs]
    top_adv = pd.Series(top_adv_lists[0] + top_adv_lists[1]).drop_duplicates()
    # top_adv = pd.concat((top_dfs[0].l2, top_dfs[1].l2)).drop_duplicates()

    print_iter(
        [f'_{w}_' for w in top_adv], bullet='1.',
        header=f'Union of top adverbs for `{name_1}` and `{name_2}`. (Novel `{name_2}` adverbs listed last)')
    print(f'\n### `{name_1}` Adverb Associations (in initially loaded table)\n')
    df_1 = narrow_selection(df_1, top_adv, env_filter, filter_items)
    print(f'\n### `{name_2}` Adverb Associations (in initially loaded table)\n')
    df_2 = narrow_selection(df_2, top_adv, env_filter, filter_items)

    name_1, name_2 = [f"_{n.strip('_')}" for n in [name_1, name_2]]
    both = df_1.join(df_2, how="outer", lsuffix=name_1, rsuffix=name_2)

    # ! Empty cells need to be filled _before_ calculating mean
    both = fill_empties(name_1, name_2, both, adv_am_paths, adv_set=set(top_adv))
    both = force_ints(both)
    both = add_means(both)
    both = add_f_ratio(both, name_2, name_1)
    return both.sort_values('mean_dP1', ascending=False)


def add_f_ratio(df, subset_name, superset_name):
    counts = df.filter(regex=r'^[Nf][12]?').columns.str.split(
        '_').str.get(0).drop_duplicates()
    for count in counts:
        ratio_col = f'ratio_{count}{subset_name}'
        df[ratio_col] = (df[f'{count}{subset_name}']
                         / df[f'{count}{superset_name}'])
        # print(df.filter(like=count))
    return df


def add_means(both):
    for metric in (both.select_dtypes(include='number').columns.to_series()
                   .str.replace(r'_(MIR|SET)$', '', regex=True).unique()):
        both[f'mean_{snake_to_camel(metric)}'] = both.filter(
            regex=f"^{metric}").agg('mean', axis='columns')
    return both


def narrow_selection(df: pd.DataFrame,
                     top_adv: list,
                     env_filter: str = 'NEG',
                     filter_items: list = FOCUS):
    df = adjust_assoc_columns(
        df.filter(items=filter_items)
        .filter(like=env_filter, axis=0)
        .reset_index().set_index('l2')
        .filter(top_adv, axis=0)).sort_values(['LRC', 'dP1'], ascending=False)
    df.index.name = 'adv'
    nb_show_table(df.drop(['N', 'key', 'l1'], axis=1).round(
        2).sort_values(['LRC', 'dP1', ], ascending=False))

    return df

### Run it 🏃‍♀️

In [192]:
C = combine_top(setdiff_adv, 'SET',
                mirror_adv, 'MIR', k=K)

### `ALL` Most Negative Adverb Selections

`SET`: union of top 6 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
1. _necessarily_
1. _that_
1. _exactly_
1. _immediately_
1. _yet_
1. _any_
1. _before_

`MIR`: union of top 6 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
1. _ever_
1. _any_
1. _necessarily_
1. _that_
1. _remotely_
1. _exactly_

Union of top adverbs for `SET` and `MIR`. (Novel `MIR` adverbs listed last)
1. _necessarily_
1. _that_
1. _exactly_
1. _immediately_
1. _yet_
1. _any_
1. _before_
1. _ever_
1. _remotely_

### `SET` Adverb Associations (in initially loaded table)


|                 |        `f` |   `dP1` |   `dP1_simple` |   `LRC` |       `G2` |   `odds_r_disc` |    `t` |         `f1` |       `f2` |   `exp_f` |   `unexp_f` |
|:----------------|-----------:|--------:|---------------:|--------:|-----------:|----------------:|-------:|-------------:|-----------:|----------:|------------:|
| **necessarily** |  42,595.00 |    0.83 |     

### `ALL` Most Negative Adverb Selections

- `SET`: union of top 6 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
  1. _necessarily_
  1. _that_
  1. _exactly_
  1. _immediately_
  1. _yet_
  1. _any_
  1. _before_

  _**Were**_:
  > 1. _necessarily_
  > 1. _exactly_
  > 1. _that_
  > 1. _immediately_
  > 1. _yet_
  > 1. _terribly_

- `MIR`: union of top 6 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
  1. _ever_
  1. _any_
  1. _necessarily_
  1. _that_
  1. _remotely_
  1. _exactly_
  
  _**Were**_:
  > 1. _ever_
  > 1. _any_
  > 1. _longer_
  > 1. _necessarily_
  > 1. _that_
  > 1. _remotely_
  > 1. _before_

- Union of top adverbs for `SET` and `MIR`. (Novel `MIR` adverbs listed last)
  1. _necessarily_
  1. _that_
  1. _exactly_
  1. _immediately_
  1. _yet_
  1. _any_
  1. _before_
  1. _ever_
  1. _remotely_

  _**Were**_:
  > 1. _necessarily_
  > 1. _exactly_
  > 1. _that_
  > 1. _immediately_
  > 1. _yet_
  > 1. _terribly_
  > 1. _ever_
  > 1. _any_
  > 1. _longer_
  > 1. _remotely_
  > 1. _before_



### `SET` Adverb Associations (in initially loaded table)


|                 |        `f` |   `dP1` |   `dP1_simple` |   `LRC` |       `G2` |   `odds_r_disc` |    `t` |         `f1` |       `f2` |   `exp_f` |   `unexp_f` |
|:----------------|-----------:|--------:|---------------:|--------:|-----------:|----------------:|-------:|-------------:|-----------:|----------:|------------:|
| **necessarily** |  42,595.00 |    0.83 |           0.87 |    7.10 | 230,257.34 |            2.17 | 196.05 | 3,173,660.00 |  48,947.00 |  2,132.65 |   40,462.35 |
| **that**        | 164,768.00 |    0.75 |           0.79 |    6.34 | 831,137.25 |            1.94 | 383.56 | 3,173,660.00 | 208,262.00 |  9,074.09 |  155,693.91 |
| **exactly**     |  43,813.00 |    0.70 |           0.75 |    5.94 | 210,126.60 |            1.82 | 197.11 | 3,173,660.00 |  58,643.00 |  2,555.11 |   41,257.89 |
| **immediately** |  56,099.00 |    0.54 |           0.58 |    4.86 | 224,059.55 |            1.49 | 219.01 | 3,173,660.00 |  96,973.00 |  4,225.17 |   51,873.83 |
| **yet**         |  51,867.00 |    0.50 |           0.54 |    4.65 | 197,610.98 |            1.42 | 209.42 | 3,173,660.00 |  95,763.00 |  4,172.45 |   47,694.55 |
| **any**         |  15,384.00 |    0.40 |           0.45 |    4.07 |  50,880.96 |            1.25 | 111.95 | 3,173,660.00 |  34,382.00 |  1,498.04 |   13,885.96 |
| **before**      |     308.00 |    0.41 |           0.45 |    3.56 |   1,025.56 |            1.26 |  15.86 | 3,173,660.00 |     681.00 |     29.67 |      278.33 |
| **remotely**    |   5,661.00 |    0.30 |           0.34 |    3.40 |  15,284.49 |            1.06 |  65.73 | 3,173,660.00 |  16,426.00 |    715.69 |    4,945.31 |
| **ever**        |   5,932.00 |    0.01 |           0.05 |    0.16 |     183.92 |            0.08 |  12.49 | 3,173,660.00 | 114,075.00 |  4,970.31 |      961.69 |

#### Previous `SET` AM:
 
> |                 |        `f` | `dP1` | `LRC` |       `G2` |         `f1` |       `f2` |  `exp_f` |  `unexp_f` |
> |:----------------|-----------:|------:|------:|-----------:|-------------:|-----------:|---------:|-----------:|
> | **necessarily** |  42,708.00 |  0.72 |  6.23 | 219,003.46 | 3,226,213.00 |  56,694.00 | 2,118.68 |  40,589.32 |
> | **exactly**     |  43,635.00 |  0.67 |  5.90 | 214,404.20 | 3,226,213.00 |  61,599.00 | 2,301.98 |  41,333.02 |
> | **that**        | 165,411.00 |  0.63 |  5.62 | 781,016.11 | 3,226,213.00 | 250,392.00 | 9,357.24 | 156,053.76 |
> | **immediately** |  57,319.00 |  0.52 |  4.96 | 239,462.58 | 3,226,213.00 | 103,177.00 | 3,855.76 |  53,463.24 |
> | **yet**         |  52,546.00 |  0.48 |  4.74 | 209,055.78 | 3,226,213.00 | 101,707.00 | 3,800.83 |  48,745.17 |
> | **terribly**    |  18,054.00 |  0.22 |  3.09 |  42,704.93 | 3,226,213.00 |  70,174.00 | 2,622.43 |  15,431.57 |
> | **remotely**    |   5,679.00 |  0.22 |  3.03 |  13,354.33 | 3,226,213.00 |  22,194.00 |   829.40 |   4,849.60 |
> | **any**         |  15,492.00 |  0.13 |  2.28 |  23,683.00 | 3,226,213.00 |  94,152.00 | 3,518.50 |  11,973.50 |
> | **ever**        |   5,967.00 |  0.01 |  0.28 |     353.58 | 3,226,213.00 | 124,592.00 | 4,656.05 |   1,310.95 |
> | **longer**      |   1,448.00 | -0.03 | -1.87 |  -4,977.41 | 3,226,213.00 | 157,984.00 | 5,903.92 |  -4,455.92 |


### `MIR` Adverb Associations (in initially loaded table)

|                 |      `f` |   `dP1` |   `dP1_simple` |   `LRC` |      `G2` |   `odds_r_disc` |   `t` |       `f1` |     `f2` |   `exp_f` |   `unexp_f` |
|:----------------|---------:|--------:|---------------:|--------:|----------:|----------------:|------:|-----------:|---------:|----------:|------------:|
| **ever**        | 4,709.00 |    0.76 |           0.93 |    5.63 | 14,253.57 |            1.82 | 55.98 | 291,732.00 | 5,060.00 |    867.35 |    3,841.65 |
| **any**         | 1,066.00 |    0.72 |           0.89 |    4.65 |  2,985.75 |            1.59 | 26.37 | 291,732.00 | 1,197.00 |    205.18 |      860.82 |
| **necessarily** |   963.00 |    0.70 |           0.87 |    4.39 |  2,597.68 |            1.51 | 24.92 | 291,732.00 | 1,107.00 |    189.75 |      773.25 |
| **that**        | 4,308.00 |    0.61 |           0.78 |    3.90 |  9,957.37 |            1.25 | 51.29 | 291,732.00 | 5,494.00 |    941.74 |    3,366.26 |
| **remotely**    | 1,840.00 |    0.62 |           0.79 |    3.79 |  4,256.34 |            1.25 | 33.54 | 291,732.00 | 2,341.00 |    401.28 |    1,438.72 |
| **exactly**     |   813.00 |    0.61 |           0.78 |    3.57 |  1,860.72 |            1.24 | 22.25 | 291,732.00 | 1,041.00 |    178.44 |      634.56 |
| **yet**         |   320.00 |    0.22 |           0.39 |    1.11 |    223.08 |            0.50 | 10.08 | 291,732.00 |   815.00 |    139.70 |      180.30 |
| **immediately** |   403.00 |    0.17 |           0.34 |    0.84 |    191.88 |            0.39 |  9.87 | 291,732.00 | 1,195.00 |    204.84 |      198.16 |


#### Previous `MIR` AM

> |                 |      `f` | `dP1` | `LRC` |      `G2` |       `f1` |     `f2` | `exp_f` | `unexp_f` |
> |:----------------|---------:|------:|------:|----------:|-----------:|---------:|--------:|----------:|
> | **ever**        | 4,688.00 |  0.77 |  5.73 | 14,624.92 | 289,770.00 | 5,027.00 |  826.79 |  3,861.21 |
> | **any**         | 1,066.00 |  0.74 |  4.88 |  3,151.64 | 289,770.00 | 1,178.00 |  193.74 |    872.26 |
> | **longer**      |   802.00 |  0.74 |  4.71 |  2,350.18 | 289,770.00 |   891.00 |  146.54 |    655.46 |
> | **necessarily** |   960.00 |  0.71 |  4.47 |  2,679.92 | 289,770.00 | 1,100.00 |  180.92 |    779.08 |
> | **that**        | 4,293.00 |  0.62 |  3.95 | 10,223.36 | 289,770.00 | 5,488.00 |  902.61 |  3,390.39 |
> | **remotely**    | 1,841.00 |  0.62 |  3.87 |  4,419.89 | 289,770.00 | 2,336.00 |  384.20 |  1,456.80 |
> | **exactly**     |   811.00 |  0.62 |  3.66 |  1,931.41 | 289,770.00 | 1,034.00 |  170.06 |    640.94 |
> | **before**      |   288.00 |  0.84 |  1.31 |  1,039.94 | 289,770.00 |   288.00 |   47.37 |    240.63 |
> | **yet**         |   319.00 |  0.23 |  1.20 |    242.11 | 289,770.00 |   810.00 |  133.22 |    185.78 |
> | **terribly**    | 1,571.00 |  0.18 |  1.18 |    857.86 | 289,770.00 | 4,596.00 |  755.90 |    815.10 |
> | **immediately** |   403.00 |  0.17 |  0.93 |    212.93 | 289,770.00 | 1,193.00 |  196.21 |    206.79 |

### $\textit{Old}$ Frequency Comparisons between Polarity Approximations: All Data vs. Mirror Subset
The following values indicate the percentage of the negated frequency (`f`) and the marginal frequency (`f2`) accounted for by the `mirror` subset for each adverb. 
That is, `ratio_f_MIR` indicates the percentage of negated tokens with the specific triggers covered by `NEGmirror`, 
and `ratio_f2_MIR` the percentage of all adverb tokens which were captured by _either_ mirror pattern, `POSmirror` or `NEGmirror`. 
The third column then indicates the discrepancy between these percentages: 
For example, 

- [ ] 🚩 **finish this discussion!**

Note that _before_ and _ever_ have a much higher proportions of their negated tokens representated in the mirror subset. 
However, the discrepancy indicated by the `difference` column, which illuminates the 

#### Percentage Comparision

|                   |  joint % MIR |  adverb % MIR | % MIR $\Delta$ |
|:------------------|-------------:|--------------:|---------------:|
| **ever**          |         79.1 |           4.2 |           74.9 |
| **before**        |         93.2 |          39.3 |           53.9 |
| **inherently**    |         41.9 |          10.3 |           31.7 |
| **intrinsically** |         40.3 |           9.9 |           30.4 |
| **remotely**      |         32.5 |          12.2 |           20.3 |
| **particularly**  |         16.6 |           2.6 |           14.0 |
| **overtly**       |         18.1 |           5.9 |           12.2 |
| **any**           |          7.0 |           1.6 |            5.4 |
| **terribly**      |          8.7 |           7.4 |            1.3 |
| **exactly**       |          1.9 |           1.8 |            0.1 |
| **entirely**      |          3.8 |           3.9 |           -0.1 |
| **yet**           |          0.6 |           0.9 |           -0.3 |
| **that**          |          2.6 |           3.0 |           -0.4 |
| **necessarily**   |          2.3 |           3.0 |           -0.7 |
| **immediately**   |          0.7 |           1.4 |           -0.7 |
| **only**          |          0.2 |           1.1 |           -1.0 |
| **altogether**    |          2.4 |           8.8 |           -6.3 |

In [193]:
nb_show_table(C.filter(regex=r'^ratio_f2?_')
              .assign(f_minus_f2=C.ratio_f_MIR - C.ratio_f2_MIR)
              .multiply(100).round(1)
              .sort_values(['f_minus_f2', 'ratio_f_MIR'], ascending=False),
              n_dec=1, adjust_columns=False)



|                 |   `ratio_f_MIR` |   `ratio_f2_MIR` |   `f_minus_f2` |
|:----------------|----------------:|-----------------:|---------------:|
| **ever**        |            79.4 |              4.4 |           74.9 |
| **before**      |            93.8 |             42.6 |           51.2 |
| **remotely**    |            32.5 |             14.3 |           18.3 |
| **any**         |             6.9 |              3.5 |            3.4 |
| **exactly**     |             1.9 |              1.8 |            0.1 |
| **that**        |             2.6 |              2.6 |           -0.0 |
| **necessarily** |             2.3 |              2.3 |           -0.0 |
| **yet**         |             0.6 |              0.9 |           -0.2 |
| **immediately** |             0.7 |              1.2 |           -0.5 |



#### Joint (_Negated_) Frequency Comparison

|                   |   `total negations` |   `mirror subset negations` |   `negations not in mirror subset` |
|:------------------|--------------------:|----------------------------:|-----------------------------------:|
| **that**          |             165,411 |                       4,338 |                            161,073 |
| **only**          |             114,070 |                         173 |                            113,897 |
| **entirely**      |              63,708 |                       2,429 |                             61,279 |
| **immediately**   |              57,319 |                         407 |                             56,912 |
| **yet**           |              52,546 |                         320 |                             52,226 |
| **particularly**  |              55,799 |                       9,278 |                             46,521 |
| **exactly**       |              43,635 |                         813 |                             42,822 |
| **necessarily**   |              42,708 |                         971 |                             41,737 |
| **terribly**      |              18,054 |                       1,579 |                             16,475 |
| **any**           |              15,492 |                       1,082 |                             14,410 |
| **altogether**    |               4,575 |                         112 |                              4,463 |
| **inherently**    |               6,847 |                       2,872 |                              3,975 |
| **remotely**      |               5,679 |                       1,846 |                              3,833 |
| **overtly**       |               2,169 |                         392 |                              1,777 |
| **ever**          |               5,967 |                       4,718 |                              1,249 |
| **intrinsically** |               1,071 |                         432 |                                639 |
| **before**        |                 311 |                         290 |                                 21 |

In [194]:
nb_show_table(
    C
    # .assign(f_percent_MIR=C.ratio_f_MIR * 100)
    .filter(regex=r'^f_.*[MS]').sort_index(axis=1, ascending=False)
    .assign(
        f_diff=C.f_SET-C.f_MIR).sort_values('f_diff', ascending=False)
    .rename(columns={'f_SET':'total negations', 
                     'f_MIR':'mirror subset negations', 
                     'f_diff': 'negations not in mirror subset'}), n_dec=0)


|                 |   `total negations` |   `mirror subset negations` |   `negations not in mirror subset` |
|:----------------|--------------------:|----------------------------:|-----------------------------------:|
| **that**        |             164,768 |                       4,308 |                            160,460 |
| **immediately** |              56,099 |                         403 |                             55,696 |
| **yet**         |              51,867 |                         320 |                             51,547 |
| **exactly**     |              43,813 |                         813 |                             43,000 |
| **necessarily** |              42,595 |                         963 |                             41,632 |
| **any**         |              15,384 |                       1,066 |                             14,318 |
| **remotely**    |               5,661 |                       1,840 |                              3,821 |
| **ever**        

#### Marginal (_Adverb Total_) Frequency Comparison

|                   |   `total adverb tokens` |   `mirror subset adverb tokens` |   `adverb tokens not in mirror subset` |
|:------------------|------------------------:|--------------------------------:|---------------------------------------:|
| **particularly**  |                 575,960 |                          14,954 |                                561,006 |
| **only**          |                 464,168 |                           5,169 |                                458,999 |
| **entirely**      |                 303,833 |                          11,803 |                                292,030 |
| **that**          |                 250,392 |                           7,472 |                                242,920 |
| **ever**          |                 124,592 |                           5,179 |                                119,413 |
| **immediately**   |                 103,177 |                           1,442 |                                101,735 |
| **yet**           |                 101,707 |                             909 |                                100,798 |
| **any**           |                  94,152 |                           1,514 |                                 92,638 |
| **terribly**      |                  70,174 |                           5,218 |                                 64,956 |
| **exactly**       |                  61,599 |                           1,114 |                                 60,485 |
| **necessarily**   |                  56,694 |                           1,681 |                                 55,013 |
| **inherently**    |                  55,088 |                           5,649 |                                 49,439 |
| **remotely**      |                  22,194 |                           2,717 |                                 19,477 |
| **altogether**    |                  20,636 |                           1,808 |                                 18,828 |
| **overtly**       |                  15,219 |                             898 |                                 14,321 |
| **intrinsically** |                  10,001 |                             991 |                                  9,010 |
| **before**        |                     748 |                             294 |                                    454 |

In [195]:
nb_show_table(
    C
    # .assign(f2_percent_MIR=C.ratio_f2_MIR * 100)
    .filter(regex=r'^f2_.*[MS]').sort_index(axis=1, ascending=False)
    .assign(
        f2_diff=C.f2_SET-C.f2_MIR).sort_values('f2_diff', ascending=False)
    .rename(columns={'f2_SET':'total adverb tokens', 
                     'f2_MIR':'mirror subset adverb tokens', 
                     'f2_diff': 'adverb tokens not in mirror subset'}), n_dec=0)


|                 |   `total adverb tokens` |   `mirror subset adverb tokens` |   `adverb tokens not in mirror subset` |
|:----------------|------------------------:|--------------------------------:|---------------------------------------:|
| **that**        |                 208,262 |                           5,494 |                                202,768 |
| **ever**        |                 114,075 |                           5,060 |                                109,015 |
| **immediately** |                  96,973 |                           1,195 |                                 95,778 |
| **yet**         |                  95,763 |                             815 |                                 94,948 |
| **exactly**     |                  58,643 |                           1,041 |                                 57,602 |
| **necessarily** |                  48,947 |                           1,107 |                                 47,840 |
| **any**         |            

In [196]:
full_C = C.copy()
main_cols_ordered = pd.concat((*[C.filter(like=m).columns.to_series() for m in ('LRC', 'P1', 'G2')],
                               *[C.filter(regex=f'^{f}_').columns.to_series() for f in ['f', 'f1', 'f2'] ]) 
                              ).to_list()
# print_iter([f'`{c}`' for c in main_cols_ordered], bullet='1.', header='Main Columns')
main_C = C[[c for c in main_cols_ordered if c in C.columns]]
nb_show_table(main_C.sort_values('mean_dP1', ascending=False), return_df=True)


|                 |   `LRC_SET` |   `LRC_MIR` |   `mean_LRC` |   `dP1_SET` |   `dP1_simple_SET` |   `dP1_MIR` |   `dP1_simple_MIR` |   `mean_dP1` |   `mean_dP1Simple` |   `G2_SET` |   `G2_MIR` |   `mean_G2` |    `f_SET` |   `f_MIR` |     `f1_SET` |   `f1_MIR` |   `f2_SET` |   `f2_MIR` |
|:----------------|------------:|------------:|-------------:|------------:|-------------------:|------------:|-------------------:|-------------:|-------------------:|-----------:|-----------:|------------:|-----------:|----------:|-------------:|-----------:|-----------:|-----------:|
| **necessarily** |        7.10 |        4.39 |         5.74 |        0.83 |               0.87 |        0.70 |               0.87 |         0.82 |               0.87 | 230,257.34 |   2,597.68 |  116,427.51 |  42,595.00 |    963.00 | 3,173,660.00 | 291,732.00 |  48,947.00 |   1,107.00 |
| **that**        |        6.34 |        3.90 |         5.12 |        0.75 |               0.79 |        0.61 |               0.78 |   

Unnamed: 0,`LRC_SET`,`LRC_MIR`,`mean_LRC`,`dP1_SET`,`dP1_simple_SET`,`dP1_MIR`,...,`f_SET`,`f_MIR`,`f1_SET`,`f1_MIR`,`f2_SET`,`f2_MIR`
**necessarily**,7.1,4.39,5.74,0.83,0.87,0.7,...,42595,963,3173660,291732,48947,1107
**that**,6.34,3.9,5.12,0.75,0.79,0.61,...,164768,4308,3173660,291732,208262,5494
**exactly**,5.94,3.57,4.76,0.7,0.75,0.61,...,43813,813,3173660,291732,58643,1041
**before**,3.56,6.21,4.89,0.41,0.45,0.83,...,308,289,3173660,291732,681,290
**any**,4.07,4.65,4.36,0.4,0.45,0.72,...,15384,1066,3173660,291732,34382,1197
**remotely**,3.4,3.79,3.59,0.3,0.34,0.62,...,5661,1840,3173660,291732,16426,2341
**ever**,0.16,5.63,2.9,0.01,0.05,0.76,...,5932,4709,3173660,291732,114075,5060
**yet**,4.65,1.11,2.88,0.5,0.54,0.22,...,51867,320,3173660,291732,95763,815
**immediately**,4.86,0.84,2.85,0.54,0.58,0.17,...,56099,403,3173660,291732,96973,1195


### $\textit{Old}$  Combined Table of AM values for "most negative" adverbs, by descending `mean_dP1`

|                 | `LRC_SET` | `LRC_MIR` | `mean_LRC` | `dP1_SET` | `dP1_MIR` | `mean_dP1` |   `G2_SET` |  `G2_MIR` |  `mean_G2` | `f_SET` | `f_MIR` |  `f1_SET` | `f1_MIR` | `f2_SET` | `f2_MIR` |
|:----------------|----------:|----------:|-----------:|----------:|----------:|-----------:|-----------:|----------:|-----------:|--------:|--------:|----------:|---------:|---------:|---------:|
| **exactly**     |      5.90 |      3.51 |       4.71 |      0.67 |      0.59 |       0.63 | 214,404.20 |  1,939.47 | 108,171.83 |  43,635 |     813 | 3,226,213 |  293,963 |   61,599 |    1,114 |
| **before**      |      3.65 |      5.11 |       4.38 |      0.38 |      0.84 |       0.61 |   1,062.13 |  1,080.52 |   1,071.32 |     311 |     290 | 3,226,213 |  293,963 |      748 |      294 |
| **necessarily** |      6.23 |      2.66 |       4.44 |      0.72 |      0.43 |       0.57 | 219,003.46 |  1,688.91 | 110,346.18 |  42,708 |     971 | 3,226,213 |  293,963 |   56,694 |    1,681 |
| **that**        |      5.62 |      2.86 |       4.24 |      0.63 |      0.44 |       0.53 | 781,016.11 |  7,632.21 | 394,324.16 | 165,411 |   4,338 | 3,226,213 |  293,963 |  250,392 |    7,472 |
| **ever**        |      0.28 |      5.57 |       2.92 |      0.01 |      0.77 |       0.39 |     353.58 | 15,340.34 |   7,846.96 |   5,967 |   4,718 | 3,226,213 |  293,963 |  124,592 |    5,179 |
| **remotely**    |      3.03 |      3.35 |       3.19 |      0.22 |      0.54 |       0.38 |  13,354.33 |  4,009.84 |   8,682.08 |   5,679 |   1,846 | 3,226,213 |  293,963 |   22,194 |    2,717 |
| **any**         |      2.28 |      3.48 |       2.88 |      0.13 |      0.57 |       0.35 |  23,683.00 |  2,511.26 |  13,097.13 |  15,492 |   1,082 | 3,226,213 |  293,963 |   94,152 |    1,514 |
| **yet**         |      4.74 |      1.18 |       2.96 |      0.48 |      0.21 |       0.34 | 209,055.78 |    242.23 | 104,649.01 |  52,546 |     320 | 3,226,213 |  293,963 |  101,707 |      909 |
| **immediately** |      4.96 |      0.79 |       2.88 |      0.52 |      0.14 |       0.33 | 239,462.58 |    181.20 | 119,821.89 |  57,319 |     407 | 3,226,213 |  293,963 |  103,177 |    1,442 |

## Save full adverb selection as `.csv`

In [197]:
combined_top_csv_output = TOP_AM_TAG_DIR / f'{TAG}-Top{K}_NEG-ADV_combined.{timestamp_today()}.csv'
print(f'Saving Combined "Most Negative Adverbs" AM table as csv:  \n> `{combined_top_csv_output}`')

Saving Combined "Most Negative Adverbs" AM table as csv:  
> `/share/compling/projects/sanpi/results/top_AM/ALL/ALL-Top6_NEG-ADV_combined.2024-07-25.csv`


In [198]:
C.to_csv(combined_top_csv_output, float_format='{:.2f}'.format)

In [199]:
C

Unnamed: 0_level_0,key_SET,f_SET,dP1_SET,dP1_simple_SET,LRC_SET,G2_SET,...,mean_expF,mean_unexpF,ratio_f_MIR,ratio_N_MIR,ratio_f1_MIR,ratio_f2_MIR
adv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
necessarily,NEGany~necessarily,42595,0.83,0.87,7.1,230257.34,...,1161.2,20617.8,0.02,0.02,0.09,0.02
that,NEGany~that,164768,0.75,0.79,6.34,831137.25,...,5007.91,79530.09,0.03,0.02,0.09,0.03
exactly,NEGany~exactly,43813,0.7,0.75,5.94,210126.6,...,1366.77,20946.23,0.02,0.02,0.09,0.02
before,NEGany~before,308,0.41,0.45,3.56,1025.56,...,39.69,258.81,0.94,0.02,0.09,0.43
any,NEGany~any,15384,0.4,0.45,4.07,50880.96,...,851.61,7373.39,0.07,0.02,0.09,0.03
remotely,NEGany~remotely,5661,0.3,0.34,3.4,15284.49,...,558.48,3192.02,0.33,0.02,0.09,0.14
ever,NEGany~ever,5932,0.01,0.05,0.16,183.92,...,2918.83,2401.67,0.79,0.02,0.09,0.04
yet,NEGany~yet,51867,0.5,0.54,4.65,197610.98,...,2156.07,23937.43,0.01,0.02,0.09,0.01
immediately,NEGany~immediately,56099,0.54,0.58,4.86,224059.55,...,2215.0,26036.0,0.01,0.02,0.09,0.01


Save `all-columns`, `means`, and `MAIN` as markdown formatted tables

In [200]:
C.to_markdown(floatfmt=',.2f', intfmt=',', buf=TOP_AM_DIR / f'Top{K}_NEG-ADV_combined_all-columns.35f-7c_{timestamp_today()}.md')
C.filter(like='mean_').to_markdown(floatfmt=',.2f', intfmt=',', buf=TOP_AM_DIR / f'Top{K}_NEG-ADV_combined_means.35f-7c_{timestamp_today()}.md')
C[main_cols_ordered].to_markdown(floatfmt=',.2f', intfmt=',', buf=TOP_AM_DIR / f'Top{K}_NEG-ADV_combined_MAIN.35f-7c_{timestamp_today()}.md')