# `NEQ`: Identifying Adverbs with Strongest Negative Environment Associations

In [1]:
import pandas as pd
from am_notebooks import *

from source.utils.associate import TOP_AM_DIR, adjust_assoc_columns
from source.utils.general import confirm_dir

TAG='NEQ'

SET_FLOOR = 5000
MIR_FLOOR = min(round(SET_FLOOR//15, -2), 1000)
K = 8

TOP_AM_TAG_DIR = TOP_AM_DIR / TAG
confirm_dir(TOP_AM_TAG_DIR)

data_top = f'{TAG}-Top{K}'
OUT_DIR = TOP_AM_TAG_DIR / data_top
confirm_dir(OUT_DIR)

Set columns and diplay settings

In [2]:
if TAG == 'NEQ': 
    FOCUS.extend(['am_p2_given1_simple', 'am_p2_given1'])
pd.set_option('display.max_colwidth', 20)
pd.set_option('display.max_columns', 12)
pd.set_option('display.width', 90)
pd.set_option("display.precision", 2)
pd.set_option("styler.format.precision", 2)
pd.set_option("styler.format.thousands", ",")
pd.set_option("display.float_format", '{:,.2f}'.format)

In [3]:
FOCUS

['f',
 'am_p1_given2',
 'am_p1_given2_simple',
 'conservative_log_ratio',
 'am_log_likelihood',
 'mutual_information',
 'am_odds_ratio_disc',
 't_score',
 'N',
 'f1',
 'f2',
 'E11',
 'unexpected_f',
 'l1',
 'l2',
 'adv',
 'adv_total',
 'adj',
 'adj_total',
 'am_p2_given1_simple',
 'am_p2_given1']

## Set paths and load adverb association tables

In [4]:
try:
    adv_am_paths = locate_polar_am_paths(superset_floor=SET_FLOOR,
                                         mirror_floor=MIR_FLOOR, data_tag=TAG)
except Exception:
    MIR_FLOOR = 100
    adv_am_paths = locate_polar_am_paths(superset_floor=SET_FLOOR,
                                         mirror_floor=MIR_FLOOR, data_tag=TAG)

{'RBdirect': PosixPath('/share/compling/projects/sanpi/results/assoc_df/polar/RBdirect/adv/extra/polarized-adv_NEQ-direct_min5000x_extra.parq'),
 'mirror': PosixPath('/share/compling/projects/sanpi/results/assoc_df/polar/mirror/adv/extra/polarized-adv_NEQ-mirror_min300x_extra.parq')}


In [5]:
setdiff_adv = filter_load_adx_am(adv_am_paths['RBdirect'])
mirror_adv = filter_load_adx_am(adv_am_paths['mirror'])

In [6]:
print(r'### Sample of Superset `RBdirect` $*E\sim\texttt{adv}$ AMs', 
      f'With $f\geq{SET_FLOOR:,}$ (i.e. `adv` occurs at least {SET_FLOOR:,} times)',
      sep='\n\n', end = '\n\n')
nb_show_table(setdiff_adv.sample(min(6,K)).sort_values('f2', ascending=False))

### Sample of Superset `RBdirect` $*E\sim\texttt{adv}$ AMs

With $f\geq5,000$ (i.e. `adv` occurs at least 5,000 times)


|                         |    `f` |   `dP1` |   `P1` |   `LRC` |      `G2` |   `MI` |   `odds_r_disc` |    `t` |       `N` |      `f1` |   `f2` |   `exp_f` |   `unexp_f` | `l1`       | `l2`         |   `P2` |   `dP2` |
|:------------------------|-------:|--------:|-------:|--------:|----------:|-------:|----------------:|-------:|----------:|----------:|-------:|----------:|------------:|:-----------|:-------------|-------:|--------:|
| **NEGany~particularly** | 55,527 |    0.23 |   0.73 |    1.37 | 16,791.84 |   0.16 |            0.43 |  74.04 | 6,347,364 | 3,173,660 | 76,162 | 38,080.74 |   17,446.26 | NEGATED    | particularly |   0.02 |    0.01 |
| **NEGany~exactly**      | 43,813 |    0.49 |   0.98 |    5.71 | 54,870.72 |   0.29 |            1.81 | 103.01 | 6,347,364 | 3,173,660 | 44,503 | 22,251.35 |   21,561.65 | NEGATED    | exactly      |   0.01 |    0.01 |

### Sample of Superset `RBdirect` $*E\sim\texttt{adv}$ AMs

With $f\geq5,000$ (i.e. `adv` occurs at least 5,000 times)


|                      |     `f` |   `dP1` |   `P1` |   `LRC` |      `G2` |   `MI` |   `odds_r_disc` |    `t` |       `N` |      `f1` |    `f2` |    `exp_f` |   `unexp_f` | `l1`       | `l2`      |
|:---------------------|--------:|--------:|-------:|--------:|----------:|-------:|----------------:|-------:|----------:|----------:|--------:|-----------:|------------:|:-----------|:----------|
| **NEGany~too**       | 309,669 |    0.23 |   0.72 |    1.31 | 89,102.93 |   0.16 |            0.43 | 167.70 | 6,347,364 | 3,173,660 | 432,697 | 216,347.00 |   93,322.00 | NEGATED    | too       |
| **NEGany~quite**     |  78,500 |    0.16 |   0.65 |    0.87 | 11,770.32 |   0.12 |            0.28 |  65.92 | 6,347,364 | 3,173,660 | 120,062 |  60,030.58 |   18,469.42 | NEGATED    | quite     |
| **COM~highly**       |  33,581 |    0.44 |   0.93 |    3.65 | 32,384.35 |   0.27 |            1.15 |  85.07 | 6,347,364 | 3,173,552 |  35,986 |  17,992.26 |   15,588.74 | COMPLEMENT | highly    |
| **NEGany~currently** |   7,077 |    0.08 |   0.58 |    0.32 |    298.75 |   0.06 |            0.14 |  11.33 | 6,347,364 | 3,173,660 |  12,247 |   6,123.46 |      953.54 | NEGATED    | currently |
| **COM~generally**    |   5,584 |    0.17 |   0.67 |    0.85 |    991.37 |   0.13 |            0.31 |  19.02 | 6,347,364 | 3,173,552 |   8,326 |   4,162.83 |    1,421.17 | COMPLEMENT | generally |
| **COM~probably**     |   5,724 |    0.46 |   0.96 |    4.14 |  6,291.06 |   0.28 |            1.39 |  36.28 | 6,347,364 | 3,173,552 |   5,958 |   2,978.88 |    2,745.12 | COMPLEMENT | probably  |



In [7]:
print(r'### Sample of Subset `mirror` $@E\sim\texttt{adv}$ AMs', 
      f'With $f\geq{MIR_FLOOR:,}$ (i.e. `adv` occurs at least {MIR_FLOOR:,} times)',
      sep='\n\n', end = '\n\n')
nb_show_table(mirror_adv.sample(min(6,K)).sort_values('f2', ascending=False))

### Sample of Subset `mirror` $@E\sim\texttt{adv}$ AMs

With $f\geq300$ (i.e. `adv` occurs at least 300 times)


|                          |    `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |    `t` |     `N` |    `f1` |    `f2` |   `exp_f` |   `unexp_f` | `l1`   | `l2`          |   `P2` |   `dP2` |
|:-------------------------|-------:|--------:|-------:|--------:|-----------:|-------:|----------------:|-------:|--------:|--------:|--------:|----------:|------------:|:-------|:--------------|-------:|--------:|
| **POS~more**             | 43,645 |   -0.18 |   0.36 |   -0.80 | -12,605.38 |  -0.15 |           -0.32 | -83.00 | 583,470 | 291,729 | 121,971 | 60,984.25 |  -17,339.25 | POSMIR | more          |   0.15 |   -0.12 |
| **POS~rather**           |  1,753 |    0.48 |   0.98 |    4.64 |   2,158.90 |   0.29 |            1.73 |  20.55 | 583,470 | 291,729 |   1,785 |    892.48 |      860.52 | POSMIR | rather        |   0.01 |    0.01 |
| **POS~physically**   

### Sample of Subset `mirror` $@E\sim\texttt{adv}$ AMs

With $f\geq300$ (i.e. `adv` occurs at least 300 times)


|                          |   `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |     `t` |     `N` |    `f1` |   `f2` |   `exp_f` |   `unexp_f` | `l1`   | `l2`          |
|:-------------------------|------:|--------:|-------:|--------:|-----------:|-------:|----------------:|--------:|--------:|--------:|-------:|----------:|------------:|:-------|:--------------|
| **NEGmir~very**          | 8,914 |   -0.33 |   0.20 |   -1.95 | -19,315.97 |  -0.41 |           -0.66 | -145.70 | 583,470 | 291,732 | 45,341 | 22,670.27 |  -13,756.27 | NEGMIR | very          |
| **NEGmir~extremely**     |   306 |   -0.42 |   0.08 |   -3.14 |  -3,256.72 |  -0.80 |           -1.07 |  -93.44 | 583,470 | 291,732 |  3,881 |  1,940.48 |   -1,634.48 | NEGMIR | extremely     |
| **POS~fully**            |   624 |   -0.23 |   0.27 |   -1.08 |    -492.34 |  -0.26 |           -0.43 |  -20.82 | 583,470 | 291,729 |  2,288 |  1,143.98 |     -519.98 | POSMIR | fully         |
| **POS~actually**         |   749 |   -0.09 |   0.41 |   -0.18 |     -57.44 |  -0.08 |           -0.16 |   -5.88 | 583,470 | 291,729 |  1,820 |    909.98 |     -160.98 | POSMIR | actually      |
| **POS~equally**          | 1,538 |    0.46 |   0.95 |    3.60 |   1,642.68 |   0.28 |            1.32 |   18.68 | 583,470 | 291,729 |  1,611 |    805.48 |      732.52 | POSMIR | equally       |
| **NEGmir~fundamentally** |   322 |   -0.05 |   0.45 |    0.00 |      -5.97 |  -0.04 |           -0.08 |   -1.81 | 583,470 | 291,732 |    709 |    354.50 |      -32.50 | NEGMIR | fundamentally |



## Calculate "Most Negative" Adverbs for each Polarity Approximation

In [8]:
[setdiff_top15, mirror_top15] = [
    get_top_vals(adv_df, k=15)
    for adv_df in (setdiff_adv, mirror_adv)
]


In [9]:
nb_show_table(setdiff_top15
              .assign(adv=setdiff_top15.l2)
              .filter(items = ['adv']+FOCUS)
              .reset_index()
              .filter(regex=r'^[^kl]'))


|        | `adv`       |     `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |    `t` |       `N` |      `f1` |    `f2` |    `exp_f` |   `unexp_f` |   `P2` |   `dP2` |
|:-------|:------------|--------:|--------:|-------:|--------:|-----------:|-------:|----------------:|-------:|----------:|----------:|--------:|-----------:|------------:|-------:|--------:|
| **1**  | that        | 164,768 |    0.50 |   0.99 |    6.26 | 214,504.57 |   0.30 |            1.96 | 200.61 | 6,347,364 | 3,173,660 | 166,676 |  83,337.42 |   81,430.58 |   0.05 |    0.05 |
| **2**  | necessarily |  42,595 |    0.50 |   0.99 |    6.77 |  56,251.14 |   0.30 |            2.17 | 102.49 | 6,347,364 | 3,173,660 |  42,886 |  21,442.85 |   21,152.15 |   0.01 |    0.01 |
| **3**  | exactly     |  43,813 |    0.49 |   0.98 |    5.71 |  54,870.72 |   0.29 |            1.81 | 103.01 | 6,347,364 | 3,173,660 |  44,503 |  22,251.35 |   21,561.65 |   0.01 |    0.01 |
| **4**  | immediately |  56,099 |

### 15 Most Negatively Associated Adverbs for full dataset 

_Absent Negative_ approximation  
as ranked by $\Delta P(1|2)$ (`dP1`) and $LRC$

|        | `adv`       |     `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |    `t` |       `N` |      `f1` |    `f2` |    `exp_f` |   `unexp_f` |
|:-------|:------------|--------:|--------:|-------:|--------:|-----------:|-------:|----------------:|-------:|----------:|----------:|--------:|-----------:|------------:|
| **1**  | ___that___        | 164,768 |    0.50 |   0.99 |    6.26 | 214,504.57 |   0.30 |            1.96 | 200.61 | 6,347,364 | 3,173,660 | 166,676 |  83,337.42 |   81,430.58 |
| **2**  | ___necessarily___ |  42,595 |    0.50 |   0.99 |    6.77 |  56,251.14 |   0.30 |            2.17 | 102.49 | 6,347,364 | 3,173,660 |  42,886 |  21,442.85 |   21,152.15 |
| **3**  | ___exactly___     |  43,813 |    0.49 |   0.98 |    5.71 |  54,870.72 |   0.29 |            1.81 | 103.01 | 6,347,364 | 3,173,660 |  44,503 |  22,251.35 |   21,561.65 |
| **4**  | _immediately_ |  56,099 |    0.47 |   0.97 |    4.68 |  63,920.54 |   0.29 |            1.47 | 114.33 | 6,347,364 | 3,173,660 |  58,040 |  29,019.80 |   27,079.20 |
| **5**  | _yet_         |  51,867 |    0.47 |   0.96 |    4.52 |  57,900.12 |   0.28 |            1.42 | 109.45 | 6,347,364 | 3,173,660 |  53,881 |  26,940.31 |   24,926.69 |
| **6**  | _any_         |  15,384 |    0.45 |   0.95 |    3.91 |  15,851.55 |   0.28 |            1.26 |  58.57 | 6,347,364 | 3,173,660 |  16,238 |   8,118.94 |    7,265.06 |
| **7**  | _remotely_    |   5,661 |    0.42 |   0.92 |    3.16 |   5,075.57 |   0.26 |            1.05 |  34.30 | 6,347,364 | 3,173,660 |   6,161 |   3,080.48 |    2,580.52 |
| **8**  | _terribly_    |  17,949 |    0.41 |   0.91 |    3.10 |  15,186.21 |   0.26 |            0.99 |  60.07 | 6,347,364 | 3,173,660 |  19,802 |   9,900.93 |    8,048.07 |
| **9**  | _only_        | 113,502 |    0.39 |   0.89 |    2.89 |  88,060.81 |   0.25 |            0.90 | 146.68 | 6,347,364 | 3,173,660 | 128,174 |  64,086.56 |   49,415.44 |
| **10** | _overly_      |  24,613 |    0.38 |   0.87 |    2.67 |  17,861.62 |   0.24 |            0.85 |  67.23 | 6,347,364 | 3,173,660 |  28,132 |  14,065.90 |   10,547.10 |
| **11** | _entirely_    |  63,321 |    0.37 |   0.87 |    2.66 |  45,040.32 |   0.24 |            0.83 | 106.96 | 6,347,364 | 3,173,660 |  72,811 |  36,405.25 |   26,915.75 |
| **12** | _as_          | 531,731 |    0.35 |   0.81 |    2.07 | 301,508.90 |   0.21 |            0.69 | 279.31 | 6,347,364 | 3,173,660 | 656,123 | 328,059.23 |  203,671.77 |
| **13** | _merely_      |   5,918 |    0.34 |   0.84 |    2.19 |   3,642.62 |   0.23 |            0.73 |  31.33 | 6,347,364 | 3,173,660 |   7,016 |   3,507.98 |    2,410.02 |
| **14** | _always_      | 103,883 |    0.32 |   0.82 |    2.12 |  56,744.90 |   0.21 |            0.67 | 125.40 | 6,347,364 | 3,173,660 | 126,929 |  63,464.06 |   40,418.94 |
| **15** | _directly_    |   8,197 |    0.31 |   0.81 |    1.89 |   4,162.98 |   0.21 |            0.63 |  34.56 | 6,347,364 | 3,173,660 |  10,136 |   5,067.96 |    3,129.04 |


In [10]:
nb_show_table(mirror_top15
              .assign(adv=mirror_top15.l2)
              .filter(items = ['adv']+FOCUS)
              .reset_index()
              .filter(regex=r'^[^kl]'))


|        | `adv`         |   `f` |   `dP1` |   `P1` |   `LRC` |     `G2` |   `MI` |   `odds_r_disc` |   `t` |     `N` |    `f1` |   `f2` |   `exp_f` |   `unexp_f` |   `P2` |   `dP2` |
|:-------|:--------------|------:|--------:|-------:|--------:|---------:|-------:|----------------:|------:|--------:|--------:|-------:|----------:|------------:|-------:|--------:|
| **1**  | ever          | 4,709 |    0.49 |   0.98 |    5.17 | 5,883.26 |   0.29 |            1.79 | 33.75 | 583,470 | 291,732 |  4,786 |  2,392.98 |    2,316.02 |   0.02 |    0.02 |
| **2**  | any           | 1,066 |    0.47 |   0.97 |    4.00 | 1,252.02 |   0.29 |            1.56 | 15.88 | 583,470 | 291,732 |  1,095 |    547.49 |      518.51 |   0.00 |    0.00 |
| **3**  | necessarily   |   963 |    0.47 |   0.97 |    3.86 | 1,114.70 |   0.29 |            1.52 | 15.05 | 583,470 | 291,732 |    992 |    495.99 |      467.01 |   0.00 |    0.00 |
| **4**  | that          | 4,308 |    0.45 |   0.94 |    3.66 | 4,405.21 |   0.

### 15 Most Negatively Associated Adverbs for `mirror` subset 

_Present Positive_ approximation  
as ranked by $\Delta P(1|2)$ (`dP1`) and $LRC$

|        | `adv`         |   `f` |   `dP1` |   `P1` |   `LRC` |     `G2` |   `MI` |   `odds_r_disc` |   `t` |     `N` |    `f1` |   `f2` |   `exp_f` |   `unexp_f` |
|:-------|:--------------|------:|--------:|-------:|--------:|---------:|-------:|----------------:|------:|--------:|--------:|-------:|----------:|------------:|
| **1**  | ever          | 4,709 |    0.49 |   0.98 |    5.17 | 5,883.26 |   0.29 |            1.79 | 33.75 | 583,470 | 291,732 |  4,786 |  2,392.98 |    2,316.02 |
| **2**  | any           | 1,066 |    0.47 |   0.97 |    4.00 | 1,252.02 |   0.29 |            1.56 | 15.88 | 583,470 | 291,732 |  1,095 |    547.49 |      518.51 |
| **3**  | necessarily   |   963 |    0.47 |   0.97 |    3.86 | 1,114.70 |   0.29 |            1.52 | 15.05 | 583,470 | 291,732 |    992 |    495.99 |      467.01 |
| **4**  | that          | 4,308 |    0.45 |   0.94 |    3.66 | 4,405.21 |   0.28 |            1.24 | 30.91 | 583,470 | 291,732 |  4,559 |  2,279.48 |    2,028.52 |
| **5**  | remotely      | 1,840 |    0.44 |   0.94 |    3.37 | 1,849.23 |   0.28 |            1.21 | 20.13 | 583,470 | 291,732 |  1,953 |    976.49 |      863.51 |
| **6**  | exactly       |   813 |    0.44 |   0.94 |    2.95 |   790.27 |   0.27 |            1.16 | 13.27 | 583,470 | 291,732 |    869 |    434.50 |      378.50 |
| **7**  | particularly  | 9,243 |    0.43 |   0.92 |    3.30 | 8,516.58 |   0.27 |            1.08 | 43.98 | 583,470 | 291,732 | 10,029 |  5,014.45 |    4,228.55 |
| **8**  | inherently    | 2,864 |    0.36 |   0.86 |    2.24 | 1,899.59 |   0.23 |            0.78 | 22.29 | 583,470 | 291,732 |  3,342 |  1,670.98 |    1,193.02 |
| **9**  | intrinsically |   433 |    0.34 |   0.84 |    1.58 |   262.63 |   0.23 |            0.72 |  8.43 | 583,470 | 291,732 |    515 |    257.50 |      175.50 |
| **10** | overtly       |   391 |    0.33 |   0.83 |    1.43 |   219.61 |   0.22 |            0.68 |  7.81 | 583,470 | 291,732 |    473 |    236.50 |      154.50 |
| **11** | especially    | 1,569 |    0.28 |   0.78 |    1.43 |   658.82 |   0.19 |            0.54 | 14.13 | 583,470 | 291,732 |  2,019 |  1,009.49 |      559.51 |
| **12** | fully         | 1,664 |    0.23 |   0.73 |    1.08 |   492.40 |   0.16 |            0.43 | 12.75 | 583,470 | 291,732 |  2,288 |  1,143.99 |      520.01 |



📌 _Note that the "top 15" adverbs for the `NEGmirror` data is actually **all** adverbs s.t. $f \geq 300$ and $\texttt{LRC} > 1$._

| l1     |   total adverbs $LRC > 1$ |
|:-------|--------------------------:|
| POSMIR |                        96 |
| NEGMIR |                        15 |



In [11]:
print(mirror_adv.loc[mirror_adv.conservative_log_ratio>1].value_counts('l1').to_frame('total adverbs $LRC > 1$').to_markdown(intfmt=','))

| l1     |   total adverbs $LRC > 1$ |
|:-------|--------------------------:|
| POSMIR |                        40 |
| NEGMIR |                        13 |


### Or here, the least "negative"/most "non-negative"

In [12]:
# All data
show_top_positive(setdiff_adv, k=15)

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Complement* Environments (`set_diff`, $*\complement_{N^+}$)

Total Tokens in dataset: $N = 6,347,364$

|                  |     `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |    `t` |      `f1` |    `f2` |    `exp_f` |   `unexp_f` |   `P2` |   `dP2` |
|:-----------------|--------:|--------:|-------:|--------:|-----------:|-------:|----------------:|-------:|----------:|--------:|-----------:|------------:|-------:|--------:|
| **increasingly** |  17,147 |    0.50 |   1.00 |    7.07 |  22,976.10 |   0.30 |            2.37 |  65.20 | 3,173,552 |  17,220 |   8,609.65 |    8,537.35 |   0.01 |    0.01 |
| **relatively**   |  26,303 |    0.49 |   0.99 |    5.97 |  33,565.49 |   0.30 |            1.92 |  80.11 | 3,173,552 |  26,621 |  13,309.95 |   12,993.05 |   0.01 |    0.01 |
| **almost**       |  19,843 |    0.48 |   0.98 |    5.28 |  24,212.21 |   0.2

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Complement* Environments (`set_diff`, $*\complement_{N^+}$)

Total Tokens in dataset: $N = 6,347,364$

|                  |     `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |    `t` |      `f1` |    `f2` |    `exp_f` |   `unexp_f` |
|:-----------------|--------:|--------:|-------:|--------:|-----------:|-------:|----------------:|-------:|----------:|--------:|-----------:|------------:|
| **increasingly** |  17,147 |    0.50 |   1.00 |    7.07 |  22,976.10 |   0.30 |            2.37 |  65.20 | 3,173,552 |  17,220 |   8,609.65 |    8,537.35 |
| **relatively**   |  26,303 |    0.49 |   0.99 |    5.97 |  33,565.49 |   0.30 |            1.92 |  80.11 | 3,173,552 |  26,621 |  13,309.95 |   12,993.05 |
| **almost**       |  19,843 |    0.48 |   0.98 |    5.28 |  24,212.21 |   0.29 |            1.70 |  69.03 | 3,173,552 |  20,240 |  10,119.59 |    9,723.41 |
| **mostly**       |   9,295 |    0.48 |   0.98 |    5.14 |  11,346.01 |   0.29 |            1.71 |  47.26 | 3,173,552 |   9,478 |   4,738.81 |    4,556.19 |
| **seemingly**    |   7,411 |    0.48 |   0.98 |    5.07 |   9,037.07 |   0.29 |            1.70 |  42.19 | 3,173,552 |   7,558 |   3,778.85 |    3,632.15 |
| **fairly**       |  17,040 |    0.48 |   0.98 |    5.00 |  20,307.10 |   0.29 |            1.61 |  63.67 | 3,173,552 |  17,457 |   8,728.14 |    8,311.86 |
| **pretty**       |  68,498 |    0.48 |   0.97 |    4.96 |  80,502.90 |   0.29 |            1.55 | 127.13 | 3,173,552 |  70,454 |  35,225.56 |   33,272.44 |
| **largely**      |   7,916 |    0.48 |   0.98 |    4.89 |   9,476.32 |   0.29 |            1.63 |  43.45 | 3,173,552 |   8,101 |   4,050.33 |    3,865.67 |
| **rather**       |  16,570 |    0.47 |   0.97 |    4.75 |  19,253.20 |   0.29 |            1.53 |  62.47 | 3,173,552 |  17,059 |   8,529.15 |    8,040.85 |
| **sometimes**    |   6,493 |    0.47 |   0.97 |    4.58 |   7,549.67 |   0.29 |            1.54 |  39.12 | 3,173,552 |   6,682 |   3,340.86 |    3,152.14 |
| **also**         |  48,143 |    0.46 |   0.96 |    4.44 |  53,221.81 |   0.28 |            1.40 | 105.23 | 3,173,552 |  50,109 |  25,053.47 |   23,089.53 |
| **now**          |  19,616 |    0.46 |   0.96 |    4.27 |  21,346.76 |   0.28 |            1.36 |  66.99 | 3,173,552 |  20,468 |  10,233.58 |    9,382.42 |
| **probably**     |   5,724 |    0.46 |   0.96 |    4.14 |   6,291.06 |   0.28 |            1.39 |  36.28 | 3,173,552 |   5,958 |   2,978.88 |    2,745.12 |
| **somewhat**     |  12,992 |    0.46 |   0.95 |    4.10 |  13,877.77 |   0.28 |            1.33 |  54.30 | 3,173,552 |  13,607 |   6,803.22 |    6,188.78 |
| **potentially**  |   8,708 |    0.46 |   0.96 |    4.07 |   9,340.83 |   0.28 |            1.34 |  44.50 | 3,173,552 |   9,111 |   4,555.31 |    4,152.69 |
| **most**         | 325,174 |    0.47 |   0.94 |    4.03 | 344,851.77 |   0.28 |            1.27 | 268.29 | 3,173,552 | 344,378 | 172,181.95 |  152,992.05 |
| **still**        |  35,199 |    0.45 |   0.95 |    3.99 |  36,320.19 |   0.28 |            1.26 |  88.57 | 3,173,552 |  37,164 |  18,581.24 |   16,617.76 |
| **highly**       |  33,581 |    0.44 |   0.93 |    3.65 |  32,384.35 |   0.27 |            1.15 |  85.07 | 3,173,552 |  35,986 |  17,992.26 |   15,588.74 |
| **extremely**    |  41,289 |    0.41 |   0.91 |    3.24 |  35,860.38 |   0.26 |            1.02 |  91.69 | 3,173,552 |  45,317 |  22,657.57 |   18,631.43 |
| **less**         |  52,587 |    0.30 |   0.80 |    1.90 |  25,036.58 |   0.20 |            0.60 |  85.34 | 3,173,552 |  66,037 |  33,017.15 |   19,569.85 |
| **more**         | 392,003 |    0.23 |   0.71 |    1.25 | 107,688.86 |   0.15 |            0.42 | 183.76 | 3,173,552 | 553,922 | 276,949.66 |  115,053.34 |
| **very**         | 412,871 |    0.20 |   0.69 |    1.10 |  93,225.19 |   0.14 |            0.37 | 173.58 | 3,173,552 | 602,694 | 301,334.66 |  111,536.34 |



In [13]:
# Mirror Data ~ explicitly positive ~ positive trigger present
show_top_positive(mirror_adv, k=15)

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Posmir* Environments (`mirror`, $@P$)

Total Tokens in dataset: $N = 583,470$

|                |    `f` |   `dP1` |   `P1` |   `LRC` |      `G2` |   `MI` |   `odds_r_disc` |   `t` |    `f1` |   `f2` |   `exp_f` |   `unexp_f` |   `P2` |   `dP2` |
|:---------------|-------:|--------:|-------:|--------:|----------:|-------:|----------------:|------:|--------:|-------:|----------:|------------:|-------:|--------:|
| **pretty**     |  5,049 |    0.48 |   0.98 |    4.71 |  6,024.97 |   0.29 |            1.61 | 34.64 | 291,729 |  5,176 |  2,587.95 |    2,461.05 |   0.02 |    0.02 |
| **rather**     |  1,753 |    0.48 |   0.98 |    4.64 |  2,158.90 |   0.29 |            1.73 | 20.55 | 291,729 |  1,785 |    892.48 |      860.52 |   0.01 |    0.01 |
| **plain**      |  1,001 |    0.48 |   0.98 |    4.38 |  1,240.10 |   0.29 |            1.76 | 15.55 | 291,729 |  1,018 |    508.99 |    

#### Adverbs in top 15 for $LRC$, $G^2$, and $\Delta P(\texttt{env}|\texttt{adv})$ measuring association with *Posmir* Environments (`mirror`, $@P$)

Total Tokens in dataset: $N = 583,470$

|                |    `f` |   `dP1` |   `P1` |   `LRC` |      `G2` |   `MI` |   `odds_r_disc` |   `t` |    `f1` |   `f2` |   `exp_f` |   `unexp_f` |
|:---------------|-------:|--------:|-------:|--------:|----------:|-------:|----------------:|------:|--------:|-------:|----------:|------------:|
| **pretty**     |  5,049 |    0.48 |   0.98 |    4.71 |  6,024.97 |   0.29 |            1.61 | 34.64 | 291,729 |  5,176 |  2,587.95 |    2,461.05 |
| **rather**     |  1,753 |    0.48 |   0.98 |    4.64 |  2,158.90 |   0.29 |            1.73 | 20.55 | 291,729 |  1,785 |    892.48 |      860.52 |
| **plain**      |  1,001 |    0.48 |   0.98 |    4.38 |  1,240.10 |   0.29 |            1.76 | 15.55 | 291,729 |  1,018 |    508.99 |      492.01 |
| **somewhat**   |    937 |    0.48 |   0.98 |    4.34 |  1,160.12 |   0.29 |            1.76 | 15.04 | 291,729 |    953 |    476.49 |      460.51 |
| **fairly**     |  1,163 |    0.48 |   0.98 |    4.31 |  1,413.04 |   0.29 |            1.68 | 16.70 | 291,729 |  1,187 |    593.49 |      569.51 |
| **otherwise**  |  1,426 |    0.47 |   0.97 |    4.07 |  1,657.11 |   0.29 |            1.53 | 18.33 | 291,729 |  1,468 |    733.98 |      692.02 |
| **maybe**      |    546 |    0.48 |   0.98 |    3.97 |    677.87 |   0.29 |            1.76 | 11.49 | 291,729 |    555 |    277.49 |      268.51 |
| **downright**  |    985 |    0.47 |   0.97 |    3.89 |  1,144.00 |   0.29 |            1.53 | 15.23 | 291,729 |  1,014 |    506.99 |      478.01 |
| **already**    |    860 |    0.47 |   0.97 |    3.76 |    989.13 |   0.29 |            1.50 | 14.20 | 291,729 |    887 |    443.49 |      416.51 |
| **relatively** |  1,073 |    0.47 |   0.97 |    3.75 |  1,210.84 |   0.29 |            1.45 | 15.80 | 291,729 |  1,111 |    555.49 |      517.51 |
| **almost**     |  1,065 |    0.46 |   0.96 |    3.67 |  1,184.44 |   0.28 |            1.41 | 15.69 | 291,729 |  1,106 |    552.99 |      512.01 |
| **equally**    |  1,538 |    0.46 |   0.95 |    3.60 |  1,642.68 |   0.28 |            1.32 | 18.68 | 291,729 |  1,611 |    805.48 |      732.52 |
| **perhaps**    |    732 |    0.47 |   0.96 |    3.52 |    819.90 |   0.29 |            1.43 | 13.03 | 291,729 |    759 |    379.49 |      352.51 |
| **highly**     |  1,848 |    0.44 |   0.93 |    3.21 |  1,788.17 |   0.27 |            1.15 | 19.98 | 291,729 |  1,978 |    988.98 |      859.02 |
| **slightly**   |  1,513 |    0.44 |   0.93 |    3.16 |  1,465.03 |   0.27 |            1.15 | 18.09 | 291,729 |  1,619 |    809.48 |      703.52 |
| **extremely**  |  3,575 |    0.42 |   0.92 |    3.14 |  3,256.93 |   0.27 |            1.07 | 27.34 | 291,729 |  3,881 |  1,940.46 |    1,634.54 |
| **also**       |  1,370 |    0.43 |   0.93 |    3.06 |  1,302.12 |   0.27 |            1.13 | 17.13 | 291,729 |  1,472 |    735.98 |      634.02 |
| **simply**     |  1,663 |    0.43 |   0.93 |    3.04 |  1,549.42 |   0.27 |            1.10 | 18.77 | 291,729 |  1,795 |    897.48 |      765.52 |
| **still**      |  2,706 |    0.41 |   0.91 |    2.93 |  2,356.33 |   0.26 |            1.02 | 23.50 | 291,729 |  2,967 |  1,483.47 |    1,222.53 |
| **incredibly** |  1,826 |    0.42 |   0.92 |    2.89 |  1,616.39 |   0.26 |            1.04 | 19.40 | 291,729 |  1,994 |    996.98 |      829.02 |
| **possibly**   |    339 |    0.46 |   0.96 |    2.88 |    366.75 |   0.28 |            1.34 |  8.80 | 291,729 |    354 |    177.00 |      162.00 |
| **just**       |  5,883 |    0.39 |   0.89 |    2.70 |  4,553.79 |   0.25 |            0.90 | 33.45 | 291,729 |  6,635 |  3,317.43 |    2,565.57 |
| **even**       | 12,382 |    0.32 |   0.81 |    1.98 |  6,616.74 |   0.21 |            0.65 | 42.89 | 291,729 | 15,220 |  7,609.84 |    4,772.16 |
| **very**       | 36,427 |    0.33 |   0.80 |    1.95 | 19,317.81 |   0.21 |            0.66 | 72.08 | 291,729 | 45,341 | 22,670.03 |   13,756.97 |



## Compile top NEG~adverb associations across both approximation methods

### Define the functions

[_moved to `./am_notebooks.py`_]

### Run it 🏃‍♀️

In [14]:
C = combine_top_adv(setdiff_adv, 'SET',
                mirror_adv, 'MIR',adv_am_paths=adv_am_paths,
                k=K, data_tag=TAG)

### `NEQ` Most Negative Adverb Selections

`SET`: union of top 8 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
1. _necessarily_
1. _that_
1. _exactly_
1. _immediately_
1. _yet_
1. _any_
1. _remotely_
1. _terribly_

`MIR`: union of top 8 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
1. _ever_
1. _any_
1. _necessarily_
1. _that_
1. _remotely_
1. _particularly_
1. _exactly_
1. _inherently_

Union of top adverbs for `SET` and `MIR`. (Novel `MIR` adverbs listed last)
1. _necessarily_
1. _that_
1. _exactly_
1. _immediately_
1. _yet_
1. _any_
1. _remotely_
1. _terribly_
1. _ever_
1. _particularly_
1. _inherently_

### `SET` Adverb Associations (in initially loaded table)


|                  |     `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |    `t` |      `f1` |    `f2` |   `exp_f` |   `unexp_f` |   `P2` |   `dP2` |
|:-----------------|--------:|--------:|-------:|--------:|-----------:|-------:|----------------:|-------:|-----

### `NEQ` Most Negative Adverb Selections

`SET`: union of top 8 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
1. _necessarily_
1. _that_
1. _exactly_
1. _immediately_
1. _yet_
1. _any_
1. _remotely_
1. _terribly_

`MIR`: union of top 8 adverbs ranked by $LRC$ & $\Delta P(\texttt{env}|\texttt{adv})$
1. _ever_
1. _any_
1. _necessarily_
1. _that_
1. _remotely_
1. _particularly_
1. _exactly_
1. _inherently_

Union of top adverbs for `SET` and `MIR`. (Novel `MIR` adverbs listed last)
1. _necessarily_
1. _that_
1. _exactly_
1. _immediately_
1. _yet_
1. _any_
1. _remotely_
1. _terribly_
1. _ever_
1. _particularly_
1. _inherently_

### `SET` Adverb Associations (in initially loaded table)


|                  |     `f` |   `dP1` |   `P1` |   `LRC` |       `G2` |   `MI` |   `odds_r_disc` |    `t` |      `f1` |    `f2` |   `exp_f` |   `unexp_f` |
|:-----------------|--------:|--------:|-------:|--------:|-----------:|-------:|----------------:|-------:|----------:|--------:|----------:|------------:|
| **necessarily**  |  42,595 |    0.50 |   0.99 |    6.77 |  56,251.14 |   0.30 |            2.17 | 102.49 | 3,173,660 |  42,886 | 21,442.85 |   21,152.15 |
| **that**         | 164,768 |    0.50 |   0.99 |    6.26 | 214,504.57 |   0.30 |            1.96 | 200.61 | 3,173,660 | 166,676 | 83,337.42 |   81,430.58 |
| **exactly**      |  43,813 |    0.49 |   0.98 |    5.71 |  54,870.72 |   0.29 |            1.81 | 103.01 | 3,173,660 |  44,503 | 22,251.35 |   21,561.65 |
| **immediately**  |  56,099 |    0.47 |   0.97 |    4.68 |  63,920.54 |   0.29 |            1.47 | 114.33 | 3,173,660 |  58,040 | 29,019.80 |   27,079.20 |
| **yet**          |  51,867 |    0.47 |   0.96 |    4.52 |  57,900.12 |   0.28 |            1.42 | 109.45 | 3,173,660 |  53,881 | 26,940.31 |   24,926.69 |
| **any**          |  15,384 |    0.45 |   0.95 |    3.91 |  15,851.55 |   0.28 |            1.26 |  58.57 | 3,173,660 |  16,238 |  8,118.94 |    7,265.06 |
| **remotely**     |   5,661 |    0.42 |   0.92 |    3.16 |   5,075.57 |   0.26 |            1.05 |  34.30 | 3,173,660 |   6,161 |  3,080.48 |    2,580.52 |
| **terribly**     |  17,949 |    0.41 |   0.91 |    3.10 |  15,186.21 |   0.26 |            0.99 |  60.07 | 3,173,660 |  19,802 |  9,900.93 |    8,048.07 |
| **inherently**   |   6,743 |    0.28 |   0.78 |    1.66 |   2,929.13 |   0.19 |            0.56 |  29.67 | 3,173,660 |   8,614 |  4,306.97 |    2,436.03 |
| **particularly** |  55,527 |    0.23 |   0.73 |    1.37 |  16,791.84 |   0.16 |            0.43 |  74.04 | 3,173,660 |  76,162 | 38,080.74 |   17,446.26 |
| **ever**         |   5,932 |    0.05 |   0.55 |    0.12 |      91.19 |   0.04 |            0.08 |   6.45 | 3,173,660 |  10,870 |  5,434.96 |      497.04 |


### `MIR` Adverb Associations (in initially loaded table)


|                  |   `f` |   `dP1` |   `P1` |   `LRC` |     `G2` |   `MI` |   `odds_r_disc` |   `t` |    `f1` |   `f2` |   `exp_f` |   `unexp_f` |
|:-----------------|------:|--------:|-------:|--------:|---------:|-------:|----------------:|------:|--------:|-------:|----------:|------------:|
| **ever**         | 4,709 |    0.49 |   0.98 |    5.17 | 5,883.26 |   0.29 |            1.79 | 33.75 | 291,732 |  4,786 |  2,392.98 |    2,316.02 |
| **any**          | 1,066 |    0.47 |   0.97 |    4.00 | 1,252.02 |   0.29 |            1.56 | 15.88 | 291,732 |  1,095 |    547.49 |      518.51 |
| **necessarily**  |   963 |    0.47 |   0.97 |    3.86 | 1,114.70 |   0.29 |            1.52 | 15.05 | 291,732 |    992 |    495.99 |      467.01 |
| **that**         | 4,308 |    0.45 |   0.94 |    3.66 | 4,405.21 |   0.28 |            1.24 | 30.91 | 291,732 |  4,559 |  2,279.48 |    2,028.52 |
| **remotely**     | 1,840 |    0.44 |   0.94 |    3.37 | 1,849.23 |   0.28 |            1.21 | 20.13 | 291,732 |  1,953 |    976.49 |      863.51 |
| **particularly** | 9,243 |    0.43 |   0.92 |    3.30 | 8,516.58 |   0.27 |            1.08 | 43.98 | 291,732 | 10,029 |  5,014.45 |    4,228.55 |
| **exactly**      |   813 |    0.44 |   0.94 |    2.95 |   790.27 |   0.27 |            1.16 | 13.27 | 291,732 |    869 |    434.50 |      378.50 |
| **inherently**   | 2,864 |    0.36 |   0.86 |    2.24 | 1,899.59 |   0.23 |            0.78 | 22.29 | 291,732 |  3,342 |  1,670.98 |    1,193.02 |
| **terribly**     | 1,567 |    0.21 |   0.71 |    0.97 |   406.49 |   0.15 |            0.39 | 11.75 | 291,732 |  2,204 |  1,101.99 |      465.01 |
| **yet**          |   320 |    0.26 |   0.76 |    0.90 |   122.77 |   0.18 |            0.51 |  6.18 | 291,732 |    419 |    209.50 |      110.50 |
| **immediately**  |   403 |    0.21 |   0.71 |    0.67 |   107.39 |   0.16 |            0.40 |  6.03 | 291,732 |    564 |    282.00 |      121.00 |



In [15]:
nb_show_table(C.filter(regex=r'^ratio_f2?_')
              .assign(f_minus_f2=C.ratio_f_MIR - C.ratio_f2_MIR)
              .multiply(100).round(1)
              .sort_values(['f_minus_f2', 'ratio_f_MIR'], ascending=False),
              n_dec=1, adjust_columns=False)



|                  |   `ratio_f_MIR` |   `ratio_f2_MIR` |   `f_minus_f2` |
|:-----------------|----------------:|-----------------:|---------------:|
| **ever**         |            79.4 |             44.0 |           35.4 |
| **inherently**   |            42.5 |             38.8 |            3.7 |
| **particularly** |            16.6 |             13.2 |            3.5 |
| **remotely**     |            32.5 |             31.7 |            0.8 |
| **any**          |             6.9 |              6.7 |            0.2 |
| **that**         |             2.6 |              2.7 |           -0.1 |
| **necessarily**  |             2.3 |              2.3 |           -0.1 |
| **exactly**      |             1.9 |              2.0 |           -0.1 |
| **yet**          |             0.6 |              0.8 |           -0.2 |
| **immediately**  |             0.7 |              1.0 |           -0.3 |
| **terribly**     |             8.7 |             11.1 |           -2.4 |




|                  |   `ratio_f_MIR` |   `ratio_f2_MIR` |   `f_minus_f2` |
|:-----------------|----------------:|-----------------:|---------------:|
| **ever**         |            79.4 |             44.0 |           35.4 |
| **inherently**   |            42.5 |             38.8 |            3.7 |
| **particularly** |            16.6 |             13.2 |            3.5 |
| **remotely**     |            32.5 |             31.7 |            0.8 |
| **any**          |             6.9 |              6.7 |            0.2 |
| **that**         |             2.6 |              2.7 |           -0.1 |
| **necessarily**  |             2.3 |              2.3 |           -0.1 |
| **exactly**      |             1.9 |              2.0 |           -0.1 |
| **yet**          |             0.6 |              0.8 |           -0.2 |
| **immediately**  |             0.7 |              1.0 |           -0.3 |
| **terribly**     |             8.7 |             11.1 |           -2.4 |



In [16]:
nb_show_table(
    C
    # .assign(f_percent_MIR=C.ratio_f_MIR * 100)
    .filter(regex=r'^f_.*[MS]').sort_index(axis=1, ascending=False)
    .assign(
        f_diff=C.f_SET-C.f_MIR).sort_values('f_diff', ascending=False)
    .rename(columns={'f_SET':'total negations', 
                     'f_MIR':'mirror subset negations', 
                     'f_diff': 'negations not in mirror subset'}), n_dec=0)


|                  |   `total negations` |   `mirror subset negations` |   `negations not in mirror subset` |
|:-----------------|--------------------:|----------------------------:|-----------------------------------:|
| **that**         |             164,768 |                       4,308 |                            160,460 |
| **immediately**  |              56,099 |                         403 |                             55,696 |
| **yet**          |              51,867 |                         320 |                             51,547 |
| **particularly** |              55,527 |                       9,243 |                             46,284 |
| **exactly**      |              43,813 |                         813 |                             43,000 |
| **necessarily**  |              42,595 |                         963 |                             41,632 |
| **terribly**     |              17,949 |                       1,567 |                             16,382 |
| **any**

#### Joint (_Negated_) Frequency Comparison

|                  |   `total negations` |   `mirror subset negations` |   `negations not in mirror subset` |
|:-----------------|--------------------:|----------------------------:|-----------------------------------:|
| **that**         |             164,768 |                       4,308 |                            160,460 |
| **immediately**  |              56,099 |                         403 |                             55,696 |
| **yet**          |              51,867 |                         320 |                             51,547 |
| **particularly** |              55,527 |                       9,243 |                             46,284 |
| **exactly**      |              43,813 |                         813 |                             43,000 |
| **necessarily**  |              42,595 |                         963 |                             41,632 |
| **terribly**     |              17,949 |                       1,567 |                             16,382 |
| **any**          |              15,384 |                       1,066 |                             14,318 |
| **inherently**   |               6,743 |                       2,864 |                              3,879 |
| **remotely**     |               5,661 |                       1,840 |                              3,821 |
| **ever**         |               5,932 |                       4,709 |                              1,223 |



In [17]:
nb_show_table(
    C
    # .assign(f2_percent_MIR=C.ratio_f2_MIR * 100)
    .filter(regex=r'^f2_.*[MS]').sort_index(axis=1, ascending=False)
    .assign(
        f2_diff=C.f2_SET-C.f2_MIR).sort_values('f2_diff', ascending=False)
    .rename(columns={'f2_SET':'total adverb tokens', 
                     'f2_MIR':'mirror subset adverb tokens', 
                     'f2_diff': 'adverb tokens not in mirror subset'}), n_dec=0)


|                  |   `total adverb tokens` |   `mirror subset adverb tokens` |   `adverb tokens not in mirror subset` |
|:-----------------|------------------------:|--------------------------------:|---------------------------------------:|
| **that**         |                 166,676 |                           4,559 |                                162,117 |
| **particularly** |                  76,162 |                          10,029 |                                 66,133 |
| **immediately**  |                  58,040 |                             564 |                                 57,476 |
| **yet**          |                  53,881 |                             419 |                                 53,462 |
| **exactly**      |                  44,503 |                             869 |                                 43,634 |
| **necessarily**  |                  42,886 |                             992 |                                 41,894 |
| **terribly**     |   

#### Marginal (_Adverb Total_) Frequency Comparison

|                  |   `total adverb tokens` |   `mirror subset adverb tokens` |   `adverb tokens not in mirror subset` |
|:-----------------|------------------------:|--------------------------------:|---------------------------------------:|
| **that**         |                 166,676 |                           4,559 |                                162,117 |
| **particularly** |                  76,162 |                          10,029 |                                 66,133 |
| **immediately**  |                  58,040 |                             564 |                                 57,476 |
| **yet**          |                  53,881 |                             419 |                                 53,462 |
| **exactly**      |                  44,503 |                             869 |                                 43,634 |
| **necessarily**  |                  42,886 |                             992 |                                 41,894 |
| **terribly**     |                  19,802 |                           2,204 |                                 17,598 |
| **any**          |                  16,238 |                           1,095 |                                 15,143 |
| **ever**         |                  10,870 |                           4,786 |                                  6,084 |
| **inherently**   |                   8,614 |                           3,342 |                                  5,272 |
| **remotely**     |                   6,161 |                           1,953 |                                  4,208 |



In [18]:
full_C = C.copy()
main_cols_ordered = pd.concat((*[C.filter(like=m).columns.to_series() for m in ('LRC', 'P1', 'G2')],
                               *[C.filter(regex=f'^{f}_').columns.to_series() for f in ['f', 'f1', 'f2'] ]) 
                              ).to_list()
# print_iter([f'`{c}`' for c in main_cols_ordered], bullet='1.', header='Main Columns')
main_C = C[[c for c in main_cols_ordered if c in C.columns]]
nb_show_table(main_C.sort_values('mean_dP1', ascending=False), return_df=True)


|                  |   `LRC_SET` |   `LRC_MIR` |   `mean_LRC` |   `dP1_SET` |   `P1_SET` |   `dP1_MIR` |   `P1_MIR` |   `mean_dP1` |   `mean_P1` |   `G2_SET` |   `G2_MIR` |   `mean_G2` |   `f_SET` |   `f_MIR` |   `f1_SET` |   `f1_MIR` |   `f2_SET` |   `f2_MIR` |
|:-----------------|------------:|------------:|-------------:|------------:|-----------:|------------:|-----------:|-------------:|------------:|-----------:|-----------:|------------:|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
| **necessarily**  |        6.77 |        3.86 |         5.31 |        0.50 |       0.99 |        0.47 |       0.97 |         0.48 |        0.98 |  56,251.14 |   1,114.70 |   28,682.92 |    42,595 |       963 |  3,173,660 |    291,732 |     42,886 |        992 |
| **that**         |        6.26 |        3.66 |         4.96 |        0.50 |       0.99 |        0.45 |       0.94 |         0.48 |        0.97 | 214,504.57 |   4,405.21 |  109,454.89 |   164,768 |     4,308 |

Unnamed: 0,`LRC_SET`,`LRC_MIR`,`mean_LRC`,`dP1_SET`,`P1_SET`,`dP1_MIR`,...,`f_SET`,`f_MIR`,`f1_SET`,`f1_MIR`,`f2_SET`,`f2_MIR`
**necessarily**,6.77,3.86,5.31,0.5,0.99,0.47,...,42595,963,3173660,291732,42886,992
**that**,6.26,3.66,4.96,0.5,0.99,0.45,...,164768,4308,3173660,291732,166676,4559
**exactly**,5.71,2.95,4.33,0.49,0.98,0.44,...,43813,813,3173660,291732,44503,869
**any**,3.91,4.0,3.96,0.45,0.95,0.47,...,15384,1066,3173660,291732,16238,1095
**remotely**,3.16,3.37,3.27,0.42,0.92,0.44,...,5661,1840,3173660,291732,6161,1953
**yet**,4.52,0.9,2.71,0.47,0.96,0.26,...,51867,320,3173660,291732,53881,419
**immediately**,4.68,0.67,2.68,0.47,0.97,0.21,...,56099,403,3173660,291732,58040,564
**particularly**,1.37,3.3,2.33,0.23,0.73,0.43,...,55527,9243,3173660,291732,76162,10029
**inherently**,1.66,2.24,1.95,0.28,0.78,0.36,...,6743,2864,3173660,291732,8614,3342
**terribly**,3.1,0.97,2.03,0.41,0.91,0.21,...,17949,1567,3173660,291732,19802,2204



|                  |   `LRC_SET` |   `LRC_MIR` |   `mean_LRC` |   `dP1_SET` |   `P1_SET` |   `dP1_MIR` |   `P1_MIR` |   `mean_dP1` |   `mean_P1` |   `G2_SET` |   `G2_MIR` |   `mean_G2` |   `f_SET` |   `f_MIR` |   `f1_SET` |   `f1_MIR` |   `f2_SET` |   `f2_MIR` |
|:-----------------|------------:|------------:|-------------:|------------:|-----------:|------------:|-----------:|-------------:|------------:|-----------:|-----------:|------------:|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
| **necessarily**  |        6.77 |        3.86 |         5.31 |        0.50 |       0.99 |        0.47 |       0.97 |         0.48 |        0.98 |  56,251.14 |   1,114.70 |   28,682.92 |    42,595 |       963 |  3,173,660 |    291,732 |     42,886 |        992 |
| **that**         |        6.26 |        3.66 |         4.96 |        0.50 |       0.99 |        0.45 |       0.94 |         0.48 |        0.97 | 214,504.57 |   4,405.21 |  109,454.89 |   164,768 |     4,308 |  3,173,660 |    291,732 |    166,676 |      4,559 |
| **exactly**      |        5.71 |        2.95 |         4.33 |        0.49 |       0.98 |        0.44 |       0.94 |         0.46 |        0.96 |  54,870.72 |     790.27 |   27,830.50 |    43,813 |       813 |  3,173,660 |    291,732 |     44,503 |        869 |
| **any**          |        3.91 |        4.00 |         3.96 |        0.45 |       0.95 |        0.47 |       0.97 |         0.46 |        0.96 |  15,851.55 |   1,252.02 |    8,551.79 |    15,384 |     1,066 |  3,173,660 |    291,732 |     16,238 |      1,095 |
| **remotely**     |        3.16 |        3.37 |         3.27 |        0.42 |       0.92 |        0.44 |       0.94 |         0.43 |        0.93 |   5,075.57 |   1,849.23 |    3,462.40 |     5,661 |     1,840 |  3,173,660 |    291,732 |      6,161 |      1,953 |
| **yet**          |        4.52 |        0.90 |         2.71 |        0.47 |       0.96 |        0.26 |       0.76 |         0.37 |        0.86 |  57,900.12 |     122.77 |   29,011.45 |    51,867 |       320 |  3,173,660 |    291,732 |     53,881 |        419 |
| **immediately**  |        4.68 |        0.67 |         2.68 |        0.47 |       0.97 |        0.21 |       0.71 |         0.34 |        0.84 |  63,920.54 |     107.39 |   32,013.96 |    56,099 |       403 |  3,173,660 |    291,732 |     58,040 |        564 |
| **particularly** |        1.37 |        3.30 |         2.33 |        0.23 |       0.73 |        0.43 |       0.92 |         0.33 |        0.83 |  16,791.84 |   8,516.58 |   12,654.21 |    55,527 |     9,243 |  3,173,660 |    291,732 |     76,162 |     10,029 |
| **inherently**   |        1.66 |        2.24 |         1.95 |        0.28 |       0.78 |        0.36 |       0.86 |         0.32 |        0.82 |   2,929.13 |   1,899.59 |    2,414.36 |     6,743 |     2,864 |  3,173,660 |    291,732 |      8,614 |      3,342 |
| **terribly**     |        3.10 |        0.97 |         2.03 |        0.41 |       0.91 |        0.21 |       0.71 |         0.31 |        0.81 |  15,186.21 |     406.49 |    7,796.35 |    17,949 |     1,567 |  3,173,660 |    291,732 |     19,802 |      2,204 |
| **ever**         |        0.12 |        5.17 |         2.65 |        0.05 |       0.55 |        0.49 |       0.98 |         0.27 |        0.76 |      91.19 |   5,883.26 |    2,987.23 |     5,932 |     4,709 |  3,173,660 |    291,732 |     10,870 |      4,786 |



## Save full adverb selection as `.csv`

In [19]:
save_prefix=f'{data_top}_NEG-ADV_combined-{SET_FLOOR}'
combined_top_csv_output = OUT_DIR / f'{save_prefix}.{timestamp_today()}.csv'
print('Saving Combined "Most Negative Adverbs" AM table as csv:  '
    f'\n> `{combined_top_csv_output}`')

C.to_csv(combined_top_csv_output, float_format='{:.4f}'.format)

C

Saving Combined "Most Negative Adverbs" AM table as csv:  
> `/share/compling/projects/sanpi/results/top_AM/NEQ/NEQ-Top8/NEQ-Top8_NEG-ADV_combined-5000.2024-07-29.csv`


Unnamed: 0_level_0,key_SET,f_SET,dP1_SET,P1_SET,LRC_SET,G2_SET,...,mean_P2,mean_dP2,ratio_f_MIR,ratio_N_MIR,ratio_f1_MIR,ratio_f2_MIR
adv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
necessarily,NEGany~necessarily,42595,0.5,0.99,6.77,56251.14,...,0.01,0.01,0.02,0.09,0.09,0.02
that,NEGany~that,164768,0.5,0.99,6.26,214504.57,...,0.03,0.03,0.03,0.09,0.09,0.03
exactly,NEGany~exactly,43813,0.49,0.98,5.71,54870.72,...,0.01,0.01,0.02,0.09,0.09,0.02
any,NEGany~any,15384,0.45,0.95,3.91,15851.55,...,0.0,0.0,0.07,0.09,0.09,0.07
remotely,NEGany~remotely,5661,0.42,0.92,3.16,5075.57,...,0.0,0.0,0.33,0.09,0.09,0.32
yet,NEGany~yet,51867,0.47,0.96,4.52,57900.12,...,0.01,0.01,0.01,0.09,0.09,0.01
immediately,NEGany~immediately,56099,0.47,0.97,4.68,63920.54,...,0.01,0.01,0.01,0.09,0.09,0.01
particularly,NEGany~particularly,55527,0.23,0.73,1.37,16791.84,...,0.02,0.02,0.17,0.09,0.09,0.13
inherently,NEGany~inherently,6743,0.28,0.78,1.66,2929.13,...,0.01,0.0,0.42,0.09,0.09,0.39
terribly,NEGany~terribly,17949,0.41,0.91,3.1,15186.21,...,0.01,0.0,0.09,0.09,0.09,0.11


Saving Combined "Most Negative Adverbs" AM table as csv:  
> `/share/compling/projects/sanpi/results/top_AM/NEQ/NEQ-Top8/NEQ-Top8_NEG-ADV_combined-5000.2024-07-28.csv`


Save `all-columns`, `means`, and `MAIN` as markdown formatted tables

In [20]:
C.to_markdown(
    floatfmt=',.2f', intfmt=',',
    buf=OUT_DIR.joinpath(
        f'{save_prefix}_all-columns_{timestamp_today()}.md')
)
C.filter(like='mean_').to_markdown(
    floatfmt=',.2f', intfmt=',',
    buf=OUT_DIR.joinpath(
        f'{save_prefix}_means_{timestamp_today()}.md')
)
C[main_cols_ordered].to_markdown(
    floatfmt=',.2f', intfmt=',',
    buf=OUT_DIR.joinpath(
        f'{save_prefix}_MAIN_{timestamp_today()}.md')
)

In [21]:
nb_show_table(C[main_cols_ordered])


|                  |   `LRC_SET` |   `LRC_MIR` |   `mean_LRC` |   `dP1_SET` |   `P1_SET` |   `dP1_MIR` |   `P1_MIR` |   `mean_dP1` |   `mean_P1` |   `G2_SET` |   `G2_MIR` |   `mean_G2` |   `f_SET` |   `f_MIR` |   `f1_SET` |   `f1_MIR` |   `f2_SET` |   `f2_MIR` |
|:-----------------|------------:|------------:|-------------:|------------:|-----------:|------------:|-----------:|-------------:|------------:|-----------:|-----------:|------------:|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
| **necessarily**  |        6.77 |        3.86 |         5.31 |        0.50 |       0.99 |        0.47 |       0.97 |         0.48 |        0.98 |  56,251.14 |   1,114.70 |   28,682.92 |    42,595 |       963 |  3,173,660 |    291,732 |     42,886 |        992 |
| **that**         |        6.26 |        3.66 |         4.96 |        0.50 |       0.99 |        0.45 |       0.94 |         0.48 |        0.97 | 214,504.57 |   4,405.21 |  109,454.89 |   164,768 |     4,308 |


|                  |   `LRC_SET` |   `LRC_MIR` |   `mean_LRC` |   `dP1_SET` |   `P1_SET` |   `dP1_MIR` |   `P1_MIR` |   `mean_dP1` |   `mean_P1` |   `G2_SET` |   `G2_MIR` |   `mean_G2` |   `f_SET` |   `f_MIR` |   `f1_SET` |   `f1_MIR` |   `f2_SET` |   `f2_MIR` |
|:-----------------|------------:|------------:|-------------:|------------:|-----------:|------------:|-----------:|-------------:|------------:|-----------:|-----------:|------------:|----------:|----------:|-----------:|-----------:|-----------:|-----------:|
| **necessarily**  |        6.77 |        3.86 |         5.31 |        0.50 |       0.99 |        0.47 |       0.97 |         0.48 |        0.98 |  56,251.14 |   1,114.70 |   28,682.92 |    42,595 |       963 |  3,173,660 |    291,732 |     42,886 |        992 |
| **that**         |        6.26 |        3.66 |         4.96 |        0.50 |       0.99 |        0.45 |       0.94 |         0.48 |        0.97 | 214,504.57 |   4,405.21 |  109,454.89 |   164,768 |     4,308 |  3,173,660 |    291,732 |    166,676 |      4,559 |
| **exactly**      |        5.71 |        2.95 |         4.33 |        0.49 |       0.98 |        0.44 |       0.94 |         0.46 |        0.96 |  54,870.72 |     790.27 |   27,830.50 |    43,813 |       813 |  3,173,660 |    291,732 |     44,503 |        869 |
| **any**          |        3.91 |        4.00 |         3.96 |        0.45 |       0.95 |        0.47 |       0.97 |         0.46 |        0.96 |  15,851.55 |   1,252.02 |    8,551.79 |    15,384 |     1,066 |  3,173,660 |    291,732 |     16,238 |      1,095 |
| **remotely**     |        3.16 |        3.37 |         3.27 |        0.42 |       0.92 |        0.44 |       0.94 |         0.43 |        0.93 |   5,075.57 |   1,849.23 |    3,462.40 |     5,661 |     1,840 |  3,173,660 |    291,732 |      6,161 |      1,953 |
| **yet**          |        4.52 |        0.90 |         2.71 |        0.47 |       0.96 |        0.26 |       0.76 |         0.37 |        0.86 |  57,900.12 |     122.77 |   29,011.45 |    51,867 |       320 |  3,173,660 |    291,732 |     53,881 |        419 |
| **immediately**  |        4.68 |        0.67 |         2.68 |        0.47 |       0.97 |        0.21 |       0.71 |         0.34 |        0.84 |  63,920.54 |     107.39 |   32,013.96 |    56,099 |       403 |  3,173,660 |    291,732 |     58,040 |        564 |
| **particularly** |        1.37 |        3.30 |         2.33 |        0.23 |       0.73 |        0.43 |       0.92 |         0.33 |        0.83 |  16,791.84 |   8,516.58 |   12,654.21 |    55,527 |     9,243 |  3,173,660 |    291,732 |     76,162 |     10,029 |
| **inherently**   |        1.66 |        2.24 |         1.95 |        0.28 |       0.78 |        0.36 |       0.86 |         0.32 |        0.82 |   2,929.13 |   1,899.59 |    2,414.36 |     6,743 |     2,864 |  3,173,660 |    291,732 |      8,614 |      3,342 |
| **terribly**     |        3.10 |        0.97 |         2.03 |        0.41 |       0.91 |        0.21 |       0.71 |         0.31 |        0.81 |  15,186.21 |     406.49 |    7,796.35 |    17,949 |     1,567 |  3,173,660 |    291,732 |     19,802 |      2,204 |
| **ever**         |        0.12 |        5.17 |         2.65 |        0.05 |       0.55 |        0.49 |       0.98 |         0.27 |        0.76 |      91.19 |   5,883.26 |    2,987.23 |     5,932 |     4,709 |  3,173,660 |    291,732 |     10,870 |      4,786 |



In [22]:
nb_show_table(C.filter(like='mean_').sort_values('mean_dP1', ascending=False))



|                  |   `mean_f` |   `mean_dP1` |   `mean_P1` |   `mean_LRC` |   `mean_G2` |   `mean_MI` |   `mean_oddsRDisc` |   `mean_t` |     `mean_N` |    `mean_f1` |   `mean_f2` |   `mean_expF` |   `mean_unexpF` |   `mean_P2` |   `mean_dP2` |
|:-----------------|-----------:|-------------:|------------:|-------------:|------------:|------------:|-------------------:|-----------:|-------------:|-------------:|------------:|--------------:|----------------:|------------:|-------------:|
| **necessarily**  | 592,138.00 |         0.48 |        0.98 |         5.31 |   28,682.92 |        0.29 |               1.84 |      58.77 | 3,465,417.00 | 1,732,696.00 |   21,939.00 |     10,969.42 |       10,809.58 |        0.01 |         0.01 |
| **that**         | 634,283.83 |         0.48 |        0.97 |         4.96 |  109,454.89 |        0.29 |               1.60 |     115.76 | 3,465,417.00 | 1,732,696.00 |   85,617.50 |     42,808.45 |       41,729.55 |        0.03 |         0.03 |
| **exactly


|                  |   `mean_f` |   `mean_dP1` |   `mean_P1` |   `mean_LRC` |   `mean_G2` |   `mean_MI` |   `mean_oddsRDisc` |   `mean_t` |     `mean_N` |    `mean_f1` |   `mean_f2` |   `mean_expF` |   `mean_unexpF` |   `mean_P2` |   `mean_dP2` |
|:-----------------|-----------:|-------------:|------------:|-------------:|------------:|------------:|-------------------:|-----------:|-------------:|-------------:|------------:|--------------:|----------------:|------------:|-------------:|
| **necessarily**  | 592,138.00 |         0.48 |        0.98 |         5.31 |   28,682.92 |        0.29 |               1.84 |      58.77 | 3,465,417.00 | 1,732,696.00 |   21,939.00 |     10,969.42 |       10,809.58 |        0.01 |         0.01 |
| **that**         | 634,283.83 |         0.48 |        0.97 |         4.96 |  109,454.89 |        0.29 |               1.60 |     115.76 | 3,465,417.00 | 1,732,696.00 |   85,617.50 |     42,808.45 |       41,729.55 |        0.03 |         0.03 |
| **exactly**      | 592,565.00 |         0.46 |        0.96 |         4.33 |   27,830.50 |        0.28 |               1.48 |      58.14 | 3,465,417.00 | 1,732,696.00 |   22,686.00 |     11,342.92 |       10,970.08 |        0.01 |         0.01 |
| **any**          | 583,195.83 |         0.46 |        0.96 |         3.96 |    8,551.79 |        0.28 |               1.41 |      37.23 | 3,465,417.00 | 1,732,696.00 |    8,666.50 |      4,333.22 |        3,891.78 |        0.00 |         0.00 |
| **remotely**     | 580,167.83 |         0.43 |        0.93 |         3.27 |    3,462.40 |        0.27 |               1.13 |      27.21 | 3,465,417.00 | 1,732,696.00 |    4,057.00 |      2,028.48 |        1,722.02 |        0.00 |         0.00 |
| **yet**          | 595,313.17 |         0.37 |        0.86 |         2.71 |   29,011.45 |        0.23 |               0.96 |      57.81 | 3,465,417.00 | 1,732,696.00 |   27,150.00 |     13,574.91 |       12,518.59 |        0.01 |         0.01 |
| **immediately**  | 596,749.67 |         0.34 |        0.84 |         2.68 |   32,013.96 |        0.22 |               0.93 |      60.18 | 3,465,417.00 | 1,732,696.00 |   29,302.00 |     14,650.90 |       13,600.10 |        0.01 |         0.01 |
| **particularly** | 602,725.50 |         0.33 |        0.83 |         2.33 |   12,654.21 |        0.21 |               0.76 |      59.01 | 3,465,417.00 | 1,732,696.00 |   43,095.50 |     21,547.59 |       10,837.41 |        0.02 |         0.02 |
| **inherently**   | 581,159.17 |         0.32 |        0.82 |         1.95 |    2,414.36 |        0.21 |               0.67 |      25.98 | 3,465,417.00 | 1,732,696.00 |    5,978.00 |      2,988.98 |        1,814.52 |        0.01 |         0.00 |
| **terribly**     | 584,485.67 |         0.31 |        0.81 |         2.03 |    7,796.35 |        0.21 |               0.69 |      35.91 | 3,465,417.00 | 1,732,696.00 |   11,003.00 |      5,501.46 |        4,256.54 |        0.01 |         0.00 |
| **ever**         | 581,948.17 |         0.27 |        0.76 |         2.65 |    2,987.23 |        0.17 |               0.94 |      20.10 | 3,465,417.00 | 1,732,696.00 |    7,828.00 |      3,913.97 |        1,406.53 |        0.01 |         0.01 |

