

 # Collect bigrams corresponding to top adverbs

In [1]:
from pathlib import Path
from pprint import pprint

import pandas as pd

from source.utils.associate import POLAR_DIR, TOP_AM_DIR, adjust_assoc_columns
from source.utils.dataframes import update_assoc_index as update_index
from source.utils.general import (confirm_dir, print_iter, snake_to_camel,
                                  timestamp_today)

SET_FLOOR = 300
MIR_FLOOR = 100
K = 6
DATA_DATE = '2024-07-25'
TAG = 'ALL'
TOP_AM_TAG_DIR = TOP_AM_DIR / TAG
confirm_dir(TOP_AM_TAG_DIR)

# for loading `polar/*/bigram/*` tables
bigram_floor = 100
mirror_floor = 50



 Set columns and diplay settings

In [2]:
# %%

FOCUS = ['f',
         'am_p1_given2', 'am_p1_given2_simple', 'conservative_log_ratio',
         'am_log_likelihood',
         #  'mutual_information',
         'am_odds_ratio_disc', 't_score',
         'N', 'f1', 'f2', 'E11', 'unexpected_f',
         'l1', 'l2']
pd.set_option('display.max_colwidth', 30)
pd.set_option('display.max_columns', 9)
pd.set_option('display.width', 120)
pd.set_option("display.precision", 2)
pd.set_option("styler.format.precision", 2)
pd.set_option("styler.format.thousands", ",")
pd.set_option("display.float_format", '{:,.2f}'.format)

In [3]:
# %%

def force_ints(_df):
    count_cols = _df.filter(regex=r'total|^[fN]').columns
    _df[count_cols] = _df[count_cols].astype('int')
    # _df[count_cols] = _df[:, count_cols].astype('int64')
    # print(_df.dtypes.to_frame('dtypes'))
    return _df


def nb_show_table(df, n_dec: int = 2,
                  adjust_columns: bool = True,
                  outpath: Path = None,
                  return_df: bool = False) -> None:
    _df = df.copy()
    try:
        start_0 = _df.index.start == 0
    except AttributeError:
        pass
    else:
        _df.index.name = 'rank'
        if start_0:
            _df.index = _df.index + 1
    if adjust_columns:
        _df = adjust_assoc_columns(_df)
    _df.columns = [f'`{c}`' for c in _df.columns]
    _df.index = [f'**{r}**' for r in _df.index]
    table = _df.to_markdown(floatfmt=f',.{n_dec}f', intfmt=',')
    if outpath:
        outpath.write_text(table)

    print(f'\n{table}\n')
    return (_df if return_df else None)

In [4]:
# %%

bigram_dfs = {d.name:
              update_index(pd.read_parquet(
                  tuple(d.joinpath('bigram/extra')
                        .glob(f'*min{mirror_floor if d.name == "mirror" else bigram_floor}x*.parq')
                        )[0]))
              for d in POLAR_DIR.iterdir()}

In [5]:
# %%

def show_adv_bigrams(sample_size, C, 
                     bigram_dfs, 
                     selector: str = 'dP1', 
                     column_list: list = None) -> dict:
    def _force_ints(_df):
        count_cols = _df.filter(regex=r'total$|^[fN]').columns
        _df.loc[:, count_cols] = _df.loc[:, count_cols].apply(
            pd.to_numeric, downcast='unsigned')
        return _df

    def get_top_bigrams(bdf, adv, bigram_k):
        bdf = _force_ints(bdf.loc[bdf.adv == adv, :])
        top_by_metric = [bdf.nlargest(bigram_k * 2, m) for m in ['dP1', 'LRC']]
        half_k = bigram_k // 2
        adv_pat_bigrams = pd.concat(
            [top_bigrams.head(half_k) for top_bigrams in top_by_metric]).drop_duplicates()
        x = 0
        while len(adv_pat_bigrams) < min(bigram_k, len(bdf)):
            x += 1
            next_ix = half_k + x
            
            try:
                next_entries = [top_by_metric[0].iloc[[next_ix], :], 
                            top_by_metric[1].iloc[[next_ix], :]]
            except IndexError:
                print(f'All bigrams for {adv} retrieved.')
                break
            else:
                adv_pat_bigrams = pd.concat((adv_pat_bigrams, 
                                         *next_entries)).drop_duplicates()
        return adv_pat_bigrams.head(bigram_k)

    bigram_k = max(sample_size + 2, 10)
    print(
        f'## Top {bigram_k} "most negative" bigrams corresponding to top {sample_size} adverbs\n')
    print(timestamp_today())
    patterns = list(bigram_dfs.keys())
    top_adverbs = C.index
    bigram_samples = {adv: dict.fromkeys(
        patterns + ['both', 'adj']) for adv in top_adverbs}
    bigrams, adj = [], []

    for rank, adv in enumerate(top_adverbs, start=1):
        print(f'\n### {rank}. _{adv}_\n')
        adj_for_adv = []
        adv_top = None

        for pat, bdf in bigram_dfs.items():
            bdf = adjust_assoc_columns(
                bdf[[c for c in FOCUS + ['adj', 'adj_total', 'adv', 'adv_total'] if c in bdf.columns]])
            bdf = bdf.loc[bdf.LRC >= 1, :]
            adv_pat_bigrams = get_top_bigrams(bdf, adv, bigram_k)

            if adv_pat_bigrams.empty:
                print(f'No bigrams found in loaded `{pat}` AM table.')
            else:
                print(
                    f'\n## Top {len(adv_pat_bigrams)} `{pat}` "{adv}_*" bigrams (sorted by `{selector}`; `LRC > 1`)\n')
                column_list = column_list if column_list is not None else bdf.columns
                nb_show_table(adv_pat_bigrams.filter(column_list), n_dec=2)

            adj_for_adv.extend(adv_pat_bigrams.adj.drop_duplicates().to_list())
            bigram_samples[adv][pat] = adv_pat_bigrams
            adv_top = adv_pat_bigrams if adv_top is None else pd.concat(
                [adv_top, adv_pat_bigrams])

        bigram_samples[adv]['adj'] = set(adj_for_adv)
        bigrams.extend(adv_top.l2.drop_duplicates().to_list())
        adj.extend(adj_for_adv)
        bigram_samples[adv]['both'] = adv_top

    bigram_samples['bigrams'] = set(bigrams)
    bigram_samples['adj'] = set(adj)
    return bigram_samples, bigram_k

In [6]:
# %%

combined_am_csv = (TOP_AM_TAG_DIR /
                   f'{TAG}-Top{K}_NEG-ADV_combined.{DATA_DATE or timestamp_today()}.csv')
if not combined_am_csv.is_file():
    combined_am_csv = tuple(TOP_AM_TAG_DIR.glob(
        f'{TAG}-Top{K}_NEG-ADV_combined*.csv'))[0]

C = adjust_assoc_columns(pd.read_csv(combined_am_csv, index_col='adv'))

main_cols_ordered = pd.concat((*[C.filter(like=m).columns.to_series() for m in ('LRC', 'P1', 'G2')],
                               *[C.filter(regex=f'^{f}_').columns.to_series() for f in ['f', 'f1', 'f2']])
                              ).to_list()
C

Unnamed: 0_level_0,key_SET,f_SET,dP1_SET,dP1_simple_SET,...,r_f_MIR,r_N_MIR,r_f1_MIR,r_f2_MIR
adv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
necessarily,NEGany~necessarily,42595,0.83,0.87,...,0.02,0.02,0.09,0.02
that,NEGany~that,164768,0.75,0.79,...,0.03,0.02,0.09,0.03
exactly,NEGany~exactly,43813,0.7,0.75,...,0.02,0.02,0.09,0.02
before,NEGany~before,308,0.41,0.45,...,0.94,0.02,0.09,0.43
any,NEGany~any,15384,0.4,0.45,...,0.07,0.02,0.09,0.03
remotely,NEGany~remotely,5661,0.3,0.34,...,0.33,0.02,0.09,0.14
ever,NEGany~ever,5932,0.01,0.05,...,0.79,0.02,0.09,0.04
yet,NEGany~yet,51867,0.5,0.54,...,0.01,0.02,0.09,0.01
immediately,NEGany~immediately,56099,0.54,0.58,...,0.01,0.02,0.09,0.01


In [7]:
# %%

samples_dict, bigram_k = show_adv_bigrams(
    K, C, bigram_dfs,
    column_list=[
        'adj', 'adj_total',
        *pd.Series(main_cols_ordered).str.replace(
            r'mean_|_SET|_MIR', '', regex=True)
        .drop_duplicates().to_list(),
        # 't', 'MI'
    ]
)

## Top 10 "most negative" bigrams corresponding to top 6 adverbs

2024-07-25

### 1. _necessarily_


## Top 10 `RBdirect` "necessarily_*" bigrams (sorted by `dP1`; `LRC > 1`)


|                                   | `adj`      |   `adj_total` |   `LRC` |   `dP1` |   `dP1_simple` |     `G2` |   `f` |      `f1` |   `f2` |
|:----------------------------------|:-----------|--------------:|--------:|--------:|---------------:|---------:|------:|----------:|-------:|
| **NEGany~necessarily_indicative** | indicative |      2,313.00 |    6.29 |    0.50 |           1.00 | 1,925.89 | 1,389 | 3,173,660 |  1,389 |
| **NEGany~necessarily_easy**       | easy       |    108,923.00 |    5.67 |    0.50 |           1.00 | 1,260.28 |   909 | 3,173,660 |    909 |
| **NEGany~necessarily_new**        | new        |     21,538.00 |    4.74 |    0.50 |           1.00 |   668.24 |   482 | 3,173,660 |    482 |
| **NEGany~necessarily_surprising** | surprising |     18,776.00 |    4.23 |    0.50 |           1.00 |

  adv_top = adv_pat_bigrams if adv_top is None else pd.concat(
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat(
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat(


### `2024-07-25` Top 10 "most negative" bigrams corresponding to top 6 adverbs



#### 1. _necessarily_


 Top 10 `RBdirect` "necessarily_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                   | `adj`      | `adj_total` | `LRC` | `dP1` | `dP1_simple` |     `G2` |   `f` |      `f1` |  `f2` |
 |:----------------------------------|:-----------|------------:|------:|------:|-------------:|---------:|------:|----------:|------:|
 | **NEGany~necessarily_indicative** | indicative |    2,313.00 |  6.29 |  0.50 |         1.00 | 1,925.89 | 1,389 | 3,173,660 | 1,389 |
 | **NEGany~necessarily_easy**       | easy       |  108,923.00 |  5.67 |  0.50 |         1.00 | 1,260.28 |   909 | 3,173,660 |   909 |
 | **NEGany~necessarily_new**        | new        |   21,538.00 |  4.74 |  0.50 |         1.00 |   668.24 |   482 | 3,173,660 |   482 |
 | **NEGany~necessarily_surprising** | surprising |   18,776.00 |  4.23 |  0.50 |         1.00 |   471.36 |   340 | 3,173,660 |   340 |
 | **NEGany~necessarily_enough**     | enough     |   27,603.00 |  3.93 |  0.50 |         1.00 |   386.79 |   279 | 3,173,660 |   279 |
 | **NEGany~necessarily_bad**        | bad        |  119,509.00 |  6.31 |  0.50 |         1.00 | 2,814.04 | 2,059 | 3,173,660 | 2,062 |
 | **NEGany~necessarily_true**       | true       |   34,967.00 |  6.16 |  0.50 |         1.00 | 4,330.74 | 3,232 | 3,173,660 | 3,245 |
 | **NEGany~necessarily_better**     | better     |   50,827.00 |  6.07 |  0.50 |         1.00 | 2,564.81 | 1,887 | 3,173,660 | 1,891 |
 | **NEGany~necessarily_aware**      | aware      |   28,973.00 |  3.48 |  0.50 |         1.00 |   285.59 |   206 | 3,173,660 |   206 |
 | **NEGany~necessarily_related**    | related    |   14,260.00 |  5.14 |  0.50 |         1.00 | 1,013.51 |   741 | 3,173,660 |   742 |


 Top 3 `mirror` "necessarily_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                              | `adj` | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |    `f1` | `f2` |
 |:-----------------------------|:------|------------:|------:|------:|-------------:|-------:|----:|--------:|-----:|
 | **NEGmir~necessarily_wrong** | wrong |   20,880.00 |  5.04 |  0.81 |         0.98 | 698.74 | 211 | 291,732 |  216 |
 | **NEGmir~necessarily_bad**   | bad   |   10,261.00 |  2.95 |  0.77 |         0.94 | 154.45 |  50 | 291,732 |   53 |
 | **NEGmir~necessarily_true**  | true  |    6,191.00 |  2.69 |  0.73 |         0.90 | 150.42 |  53 | 291,732 |   59 |


#### 2. _that_


 Top 10 `RBdirect` "that_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                            | `adj`      | `adj_total` | `LRC` | `dP1` | `dP1_simple` |      `G2` |    `f` |      `f1` |   `f2` |
 |:---------------------------|:-----------|------------:|------:|------:|-------------:|----------:|-------:|----------:|-------:|
 | **NEGany~that_surprising** | surprising |   18,776.00 |  5.99 |  0.50 |         1.00 |  1,570.89 |  1,133 | 3,173,660 |  1,133 |
 | **NEGany~that_unusual**    | unusual    |    7,412.00 |  5.77 |  0.50 |         1.00 |  1,354.57 |    977 | 3,173,660 |    977 |
 | **NEGany~that_exciting**   | exciting   |   20,233.00 |  5.49 |  0.50 |         1.00 |  1,116.08 |    805 | 3,173,660 |    805 |
 | **NEGany~that_uncommon**   | uncommon   |    3,165.00 |  5.49 |  0.50 |         1.00 |  1,111.92 |    802 | 3,173,660 |    802 |
 | **NEGany~that_impressed**  | impressed  |   12,138.00 |  5.25 |  0.50 |         1.00 |    944.15 |    681 | 3,173,660 |    681 |
 | **NEGany~that_hard**       | hard       |   45,061.00 |  7.68 |  0.50 |         1.00 | 13,602.42 |  9,948 | 3,173,660 |  9,963 |
 | **NEGany~that_different**  | different  |   80,643.00 |  7.18 |  0.50 |         1.00 |  8,895.12 |  6,534 | 3,173,660 |  6,547 |
 | **NEGany~that_great**      | great      |   45,359.00 |  7.18 |  0.50 |         1.00 | 14,908.90 | 11,032 | 3,173,660 | 11,065 |
 | **NEGany~that_difficult**  | difficult  |   61,490.00 |  7.06 |  0.50 |         1.00 |  7,569.00 |  5,560 | 3,173,660 |  5,571 |
 | **NEGany~that_big**        | big        |   42,912.00 |  6.47 |  0.50 |         1.00 |  8,332.69 |  6,244 | 3,173,660 |  6,273 |


 Top 10 `mirror` "that_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                            | `adj`      | `adj_total` | `LRC` | `dP1` | `dP1_simple` |     `G2` | `f` |    `f1` | `f2` |
 |:---------------------------|:-----------|------------:|------:|------:|-------------:|---------:|----:|--------:|-----:|
 | **NEGmir~that_popular**    | popular    |    5,668.00 |  3.15 |  0.76 |         0.93 |   195.15 |  65 | 291,732 |   70 |
 | **NEGmir~that_difficult**  | difficult  |   15,956.00 |  2.88 |  0.76 |         0.93 |   156.11 |  52 | 291,732 |   56 |
 | **NEGmir~that_interested** | interested |    8,255.00 |  3.07 |  0.75 |         0.93 |   185.02 |  62 | 291,732 |   67 |
 | **NEGmir~that_close**      | close      |   13,874.00 |  2.91 |  0.74 |         0.91 |   173.70 |  60 | 291,732 |   66 |
 | **NEGmir~that_hard**       | hard       |    7,135.00 |  2.88 |  0.74 |         0.91 |   170.36 |  59 | 291,732 |   65 |
 | **NEGmir~that_simple**     | simple     |   25,408.00 |  4.36 |  0.72 |         0.90 | 1,340.19 | 474 | 291,732 |  529 |
 | **NEGmir~that_easy**       | easy       |   18,610.00 |  4.23 |  0.71 |         0.89 | 1,248.84 | 450 | 291,732 |  508 |
 | **NEGmir~that_great**      | great      |    5,568.00 |  3.57 |  0.66 |         0.84 |   725.16 | 286 | 291,732 |  342 |
 | **NEGmir~that_big**        | big        |    8,177.00 |  3.17 |  0.69 |         0.86 |   300.54 | 113 | 291,732 |  131 |
 | **NEGmir~that_good**       | good       |   31,585.00 |  3.01 |  0.56 |         0.73 |   921.36 | 447 | 291,732 |  614 |


#### 3. _exactly_


 Top 10 `RBdirect` "exactly_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                               | `adj`      | `adj_total` | `LRC` | `dP1` | `dP1_simple` |      `G2` |   `f` |      `f1` |  `f2` |
 |:------------------------------|:-----------|------------:|------:|------:|-------------:|----------:|------:|----------:|------:|
 | **NEGany~exactly_cheap**      | cheap      |    6,591.00 |  5.27 |  0.50 |         1.00 |    958.01 |   691 | 3,173,660 |   691 |
 | **NEGany~exactly_surprising** | surprising |   18,776.00 |  4.61 |  0.50 |         1.00 |    610.01 |   440 | 3,173,660 |   440 |
 | **NEGany~exactly_subtle**     | subtle     |    5,299.00 |  3.84 |  0.50 |         1.00 |    364.61 |   263 | 3,173,660 |   263 |
 | **NEGany~exactly_fair**       | fair       |    6,964.00 |  3.83 |  0.50 |         1.00 |    360.45 |   260 | 3,173,660 |   260 |
 | **NEGany~exactly_fun**        | fun        |   19,661.00 |  3.60 |  0.50 |         1.00 |    310.54 |   224 | 3,173,660 |   224 |
 | **NEGany~exactly_sure**       | sure       |  134,139.00 |  7.46 |  0.50 |         1.00 | 11,991.61 | 8,794 | 3,173,660 | 8,810 |
 | **NEGany~exactly_clear**      | clear      |   84,227.00 |  6.38 |  0.50 |         1.00 |  2,405.43 | 1,746 | 3,173,660 | 1,747 |
 | **NEGany~exactly_new**        | new        |   21,538.00 |  6.03 |  0.50 |         1.00 |  1,885.86 | 1,371 | 3,173,660 | 1,372 |
 | **NEGany~exactly_easy**       | easy       |  108,923.00 |  5.67 |  0.50 |         1.00 |  1,463.43 | 1,066 | 3,173,660 | 1,067 |
 | **NEGany~exactly_hard**       | hard       |   45,061.00 |  3.46 |  0.50 |         1.00 |    281.43 |   203 | 3,173,660 |   203 |


 Top 2 `mirror` "exactly_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                          | `adj` | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |    `f1` | `f2` |
 |:-------------------------|:------|------------:|------:|------:|-------------:|-------:|----:|--------:|-----:|
 | **NEGmir~exactly_sure**  | sure  |    6,761.00 |  5.31 |  0.83 |         1.00 | 522.11 | 148 | 291,732 |  148 |
 | **NEGmir~exactly_clear** | clear |    6,722.00 |  3.38 |  0.81 |         0.98 | 173.89 |  52 | 291,732 |   53 |


#### 4. _before_


 Top 1 `RBdirect` "before_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                             | `adj`     | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |      `f1` | `f2` |
 |:----------------------------|:----------|------------:|------:|------:|-------------:|-------:|----:|----------:|-----:|
 | **NEGany~before_available** | available |   82,956.00 |  3.25 |  0.50 |         1.00 | 245.38 | 177 | 3,173,660 |  177 |


 Top 1 `mirror` "before_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                             | `adj`     | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |    `f1` | `f2` |
 |:----------------------------|:----------|------------:|------:|------:|-------------:|-------:|----:|--------:|-----:|
 | **NEGmir~before_available** | available |   10,284.00 |  5.58 |  0.83 |         1.00 | 620.90 | 176 | 291,732 |  176 |


 #### 5. _any_


 Top 10 `RBdirect` "any_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                          | `adj`     | `adj_total` | `LRC` | `dP1` | `dP1_simple` |     `G2` |   `f` |      `f1` |  `f2` |
 |:-------------------------|:----------|------------:|------:|------:|-------------:|---------:|------:|----------:|------:|
 | **NEGany~any_younger**   | younger   |    1,784.00 |  3.80 |  0.50 |         1.00 |   353.52 |   255 | 3,173,660 |   255 |
 | **NEGany~any_happier**   | happier   |    2,004.00 |  4.66 |  0.49 |         0.99 | 1,085.12 |   828 | 3,173,660 |   834 |
 | **NEGany~any_simpler**   | simpler   |    1,446.00 |  3.06 |  0.49 |         0.99 |   285.50 |   226 | 3,173,660 |   229 |
 | **NEGany~any_easier**    | easier    |   12,877.00 |  4.42 |  0.48 |         0.98 | 1,946.26 | 1,594 | 3,173,660 | 1,625 |
 | **NEGany~any_cheaper**   | cheaper   |    3,206.00 |  2.20 |  0.48 |         0.98 |   154.36 |   129 | 3,173,660 |   132 |
 | **NEGany~any_worse**     | worse     |   12,116.00 |  3.62 |  0.46 |         0.96 | 1,816.60 | 1,686 | 3,173,660 | 1,762 |
 | **NEGany~any_better**    | better    |   50,827.00 |  3.59 |  0.44 |         0.94 | 4,753.39 | 4,719 | 3,173,660 | 5,004 |
 | **NEGany~any_different** | different |   80,643.00 |  3.03 |  0.44 |         0.94 |   905.82 |   902 | 3,173,660 |   957 |
 | **NEGany~any_clearer**   | clearer   |      972.00 |  2.76 |  0.46 |         0.96 |   382.44 |   355 | 3,173,660 |   371 |
 | **NEGany~any_harder**    | harder    |    4,395.00 |  2.36 |  0.45 |         0.95 |   240.83 |   227 | 3,173,660 |   238 |


 Top 4 `mirror` "any_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                       | `adj`  | `adj_total` | `LRC` | `dP1` | `dP1_simple` |     `G2` | `f` |    `f1` | `f2` |
 |:----------------------|:-------|------------:|------:|------:|-------------:|---------:|----:|--------:|-----:|
 | **NEGmir~any_easier** | easier |    2,386.00 |  3.04 |  0.75 |         0.92 |   181.65 |  61 | 291,732 |   66 |
 | **NEGmir~any_better** | better |   14,013.00 |  4.38 |  0.74 |         0.91 | 1,096.01 | 380 | 291,732 |  419 |
 | **NEGmir~any_worse**  | worse  |    8,790.00 |  2.73 |  0.66 |         0.83 |   217.46 |  87 | 291,732 |  105 |
 | **NEGmir~any_closer** | closer |      993.00 |  2.33 |  0.65 |         0.83 |   141.82 |  57 | 291,732 |   69 |


 #### 6. _remotely_

 All bigrams for remotely retrieved.

 Top 7 `RBdirect` "remotely_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                | `adj`      | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |      `f1` | `f2` |
 |:-------------------------------|:-----------|------------:|------:|------:|-------------:|-------:|----:|----------:|-----:|
 | **NEGany~remotely_true**       | true       |   34,967.00 |  3.53 |  0.50 |         1.00 | 334.93 | 250 | 3,173,660 |  251 |
 | **NEGany~remotely_funny**      | funny      |   14,992.00 |  2.16 |  0.47 |         0.97 | 159.09 | 137 | 3,173,660 |  141 |
 | **NEGany~remotely_close**      | close      |   46,485.00 |  2.98 |  0.45 |         0.95 | 711.52 | 694 | 3,173,660 |  733 |
 | **NEGany~remotely_comparable** | comparable |    2,401.00 |  1.62 |  0.44 |         0.94 | 119.34 | 118 | 3,173,660 |  125 |
 | **NEGany~remotely_interested** | interested |   34,543.00 |  1.99 |  0.41 |         0.91 | 278.69 | 330 | 3,173,660 |  364 |
 | **NEGany~remotely_related**    | related    |   14,260.00 |  1.33 |  0.40 |         0.90 | 116.95 | 146 | 3,173,660 |  163 |
 | **NEGany~remotely_possible**   | possible   |   30,446.00 |  1.11 |  0.36 |         0.86 | 109.15 | 164 | 3,173,660 |  191 |


 Top 4 `mirror` "remotely_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                 | `adj`       | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |    `f1` | `f2` |
 |:--------------------------------|:------------|------------:|------:|------:|-------------:|-------:|----:|--------:|-----:|
 | **NEGmir~remotely_true**        | true        |    6,191.00 |  3.04 |  0.75 |         0.92 | 181.65 |  61 | 291,732 |   66 |
 | **NEGmir~remotely_close**       | close       |   13,874.00 |  3.28 |  0.65 |         0.82 | 532.96 | 218 | 291,732 |  267 |
 | **NEGmir~remotely_interesting** | interesting |   12,447.00 |  1.80 |  0.56 |         0.73 | 115.20 |  56 | 291,732 |   77 |
 | **NEGmir~remotely_similar**     | similar     |    7,011.00 |  1.70 |  0.49 |         0.66 | 127.32 |  71 | 291,732 |  107 |


 #### 7. _ever_


 Top 10 `RBdirect` "ever_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                         | `adj`   | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |      `f1` | `f2` |
 |:------------------------|:--------|------------:|------:|------:|-------------:|-------:|----:|----------:|-----:|
 | **COM~ever_larger**     | larger  |    7,453.00 |  2.88 |  0.50 |         1.00 | 192.71 | 139 | 3,173,552 |  139 |
 | **NEGany~ever_simple**  | simple  |   46,867.00 |  3.28 |  0.50 |         1.00 | 281.20 | 211 | 3,173,660 |  212 |
 | **COM~ever_greater**    | greater |    6,949.00 |  3.09 |  0.49 |         0.99 | 246.80 | 186 | 3,173,552 |  187 |
 | **COM~ever_closer**     | closer  |    3,686.00 |  3.52 |  0.49 |         0.99 | 365.82 | 279 | 3,173,552 |  281 |
 | **COM~ever_higher**     | higher  |   12,992.00 |  2.52 |  0.49 |         0.99 | 168.50 | 129 | 3,173,552 |  130 |
 | **NEGany~ever_easy**    | easy    |  108,923.00 |  3.53 |  0.48 |         0.98 | 525.98 | 429 | 3,173,660 |  437 |
 | **NEGany~ever_certain** | certain |   11,334.00 |  2.40 |  0.48 |         0.98 | 178.54 | 147 | 3,173,660 |  150 |
 | **COM~ever_popular**    | popular |   51,120.00 |  2.59 |  0.47 |         0.97 | 250.90 | 219 | 3,173,552 |  226 |
 | **NEGany~ever_good**    | good    |  201,244.00 |  2.52 |  0.45 |         0.95 | 337.56 | 331 | 3,173,660 |  350 |
 | **NEGany~ever_enough**  | enough  |   27,603.00 |  2.32 |  0.47 |         0.97 | 195.61 | 173 | 3,173,660 |  179 |


 Top 10 `mirror` "ever_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                         | `adj`   | `adj_total` | `LRC` | `dP1` | `dP1_simple` |     `G2` | `f` |    `f1` | `f2` |
 |:------------------------|:--------|------------:|------:|------:|-------------:|---------:|----:|--------:|-----:|
 | **NEGmir~ever_simple**  | simple  |   25,408.00 |  5.82 |  0.83 |         1.00 |   726.76 | 206 | 291,732 |  206 |
 | **NEGmir~ever_enough**  | enough  |    2,596.00 |  5.30 |  0.83 |         1.00 |   518.58 | 147 | 291,732 |  147 |
 | **NEGmir~ever_certain** | certain |    1,800.00 |  5.26 |  0.83 |         1.00 |   504.47 | 143 | 291,732 |  143 |
 | **NEGmir~ever_boring**  | boring  |    1,961.00 |  3.80 |  0.83 |         1.00 |   201.07 |  57 | 291,732 |   57 |
 | **NEGmir~ever_black**   | black   |    1,412.00 |  3.77 |  0.83 |         1.00 |   197.54 |  56 | 291,732 |   56 |
 | **NEGmir~ever_easy**    | easy    |   18,610.00 |  6.44 |  0.83 |         1.00 | 1,285.01 | 368 | 291,732 |  369 |
 | **NEGmir~ever_good**    | good    |   31,585.00 |  5.68 |  0.82 |         0.99 | 1,013.87 | 299 | 291,732 |  303 |
 | **NEGmir~ever_perfect** | perfect |    3,134.00 |  5.57 |  0.82 |         1.00 |   714.47 | 206 | 291,732 |  207 |
 | **NEGmir~ever_able**    | able    |    3,704.00 |  4.17 |  0.78 |         0.95 |   426.52 | 136 | 291,732 |  143 |
 | **NEGmir~ever_wrong**   | wrong   |   20,880.00 |  4.15 |  0.80 |         0.97 |   333.70 | 102 | 291,732 |  105 |


 #### 8. _yet_


 Top 10 `RBdirect` "yet_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                          | `adj`     | `adj_total` | `LRC` | `dP1` | `dP1_simple` |      `G2` |    `f` |      `f1` |   `f2` |
 |:-------------------------|:----------|------------:|------:|------:|-------------:|----------:|-------:|----------:|-------:|
 | **NEGany~yet_clear**     | clear     |   84,227.00 |  8.66 |  0.50 |         1.00 | 14,392.25 | 10,406 | 3,173,660 | 10,409 |
 | **NEGany~yet_certain**   | certain   |   11,334.00 |  5.60 |  0.50 |         1.00 |  1,200.66 |    866 | 3,173,660 |    866 |
 | **NEGany~yet_ready**     | ready     |   29,583.00 |  8.06 |  0.50 |         1.00 | 10,344.81 |  7,501 | 3,173,660 |  7,505 |
 | **NEGany~yet_final**     | final     |    1,213.00 |  5.16 |  0.50 |         1.00 |    887.30 |    640 | 3,173,660 |    640 |
 | **NEGany~yet_public**    | public    |    2,656.00 |  4.69 |  0.50 |         1.00 |    647.44 |    467 | 3,173,660 |    467 |
 | **NEGany~yet_complete**  | complete  |    8,415.00 |  6.70 |  0.50 |         1.00 |  2,998.60 |  2,174 | 3,173,660 |  2,175 |
 | **NEGany~yet_available** | available |   82,956.00 |  6.66 |  0.50 |         1.00 |  9,950.03 |  7,430 | 3,173,660 |  7,461 |
 | **NEGany~yet_sure**      | sure      |  134,139.00 |  6.13 |  0.50 |         1.00 |  2,689.26 |  1,977 | 3,173,660 |  1,981 |
 | **NEGany~yet_dead**      | dead      |    6,348.00 |  4.47 |  0.50 |         1.00 |    555.93 |    401 | 3,173,660 |    401 |
 | **NEGany~yet_able**      | able      |   23,355.00 |  5.44 |  0.50 |         1.00 |  1,764.46 |  1,315 | 3,173,660 |  1,320 |

 No bigrams found in loaded `mirror` AM table.

 #### 9. _immediately_


 Top 10 `RBdirect` "immediately_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                    | `adj`       | `adj_total` | `LRC` | `dP1` | `dP1_simple` |      `G2` |    `f` |      `f1` |   `f2` |
 |:-----------------------------------|:------------|------------:|------:|------:|-------------:|----------:|-------:|----------:|-------:|
 | **NEGany~immediately_sure**        | sure        |  134,139.00 |  2.87 |  0.50 |         1.00 |    191.31 |    138 | 3,173,660 |    138 |
 | **NEGany~immediately_reachable**   | reachable   |      350.00 |  2.50 |  0.50 |         1.00 |    151.11 |    109 | 3,173,660 |    109 |
 | **NEGany~immediately_clear**       | clear       |   84,227.00 |  7.55 |  0.50 |         1.00 | 33,058.44 | 24,416 | 3,173,660 | 24,488 |
 | **NEGany~immediately_possible**    | possible    |   30,446.00 |  5.40 |  0.50 |         1.00 |  1,360.38 |  1,000 | 3,173,660 |  1,002 |
 | **NEGany~immediately_available**   | available   |   82,956.00 |  5.34 |  0.48 |         0.98 | 25,870.14 | 21,078 | 3,173,660 | 21,477 |
 | **NEGany~immediately_obvious**     | obvious     |   22,651.00 |  3.88 |  0.46 |         0.96 |  2,481.50 |  2,238 | 3,173,660 |  2,325 |
 | **NEGany~immediately_able**        | able        |   23,355.00 |  3.66 |  0.48 |         0.98 |    746.39 |    626 | 3,173,660 |    641 |
 | **NEGany~immediately_successful**  | successful  |   31,460.00 |  2.87 |  0.47 |         0.97 |    333.73 |    290 | 3,173,660 |    299 |
 | **NEGany~immediately_apparent**    | apparent    |    9,798.00 |  3.30 |  0.44 |         0.94 |  2,001.83 |  2,015 | 3,173,660 |  2,143 |
 | **NEGany~immediately_forthcoming** | forthcoming |    2,249.00 |  1.66 |  0.44 |         0.94 |    129.79 |    133 | 3,173,660 |    142 |


 Top 1 `mirror` "immediately_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                  | `adj`     | `adj_total` | `LRC` | `dP1` | `dP1_simple` |   `G2` | `f` |    `f1` | `f2` |
 |:---------------------------------|:----------|------------:|------:|------:|-------------:|-------:|----:|--------:|-----:|
 | **NEGmir~immediately_available** | available |   10,284.00 |  1.85 |  0.42 |         0.59 | 241.53 | 162 | 291,732 |  275 |



 ### `2024-05-23` Top 10 "most negative" bigrams corresponding to top 5 adverbs



 ## #### 1. _necessarily_


 ## Top 10 `RBdirect` "necessarily_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                       | `adj`          |   `adj_total` |   `LRC` |   `dP1` |      `G2` |   `f` |      `f1` |   `f2` |
 |:--------------------------------------|:---------------|--------------:|--------:|--------:|----------:|------:|----------:|-------:|
 | **NEGany~necessarily_sure**           | sure           |    844,981.00 |    5.91 |    0.95 |  1,436.68 |   222 | 3,226,213 |    224 |
 | **NEGany~necessarily_surprising**     | surprising     |    150,067.00 |    7.22 |    0.93 |  2,150.86 |   343 | 3,226,213 |    355 |
 | **NEGany~necessarily_indicative**     | indicative     |     12,760.00 |    8.37 |    0.93 |  8,811.69 | 1,406 | 3,226,213 |  1,456 |
 | **NEGany~necessarily_representative** | representative |     25,187.00 |    7.31 |    0.91 |  3,044.27 |   496 | 3,226,213 |    524 |
 | **NEGany~necessarily_available**      | available      |    866,272.00 |    6.36 |    0.89 |  1,280.24 |   213 | 3,226,213 |    230 |
 | **NEGany~necessarily_easy**           | easy           |    771,307.00 |    7.26 |    0.88 |  5,448.34 |   914 | 3,226,213 |    996 |
 | **NEGany~necessarily_true**           | true           |    348,994.00 |    6.89 |    0.82 | 18,199.76 | 3,238 | 3,226,213 |  3,786 |
 | **NEGany~necessarily_illegal**        | illegal        |     44,028.00 |    6.48 |    0.87 |  1,659.90 |   280 | 3,226,213 |    307 |
 | **NEGany~necessarily_related**        | related        |    137,661.00 |    6.74 |    0.84 |  4,271.76 |   742 | 3,226,213 |    842 |
 | **NEGany~necessarily_interested**     | interested     |    364,497.00 |    6.77 |    0.87 |  2,500.26 |   422 | 3,226,213 |    463 |


 ## Top 3 `NEGmirror` "necessarily_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                              | `adj`   |   `adj_total` |   `LRC` |   `dP1` |   `G2` |   `f` |    `f1` |   `f2` |
 |:-----------------------------|:--------|--------------:|--------:|--------:|-------:|------:|--------:|-------:|
 | **NEGmir~necessarily_wrong** | wrong   |     20,866.00 |    4.27 |    0.81 | 708.98 |   209 | 289,770 |    214 |
 | **NEGmir~necessarily_bad**   | bad     |     10,783.00 |    2.02 |    0.76 | 153.43 |    50 | 289,770 |     54 |
 | **NEGmir~necessarily_true**  | true    |      7,402.00 |    2.18 |    0.75 | 159.07 |    53 | 289,770 |     58 |


 ## #### 2. _exactly_


 ## Top 10 `RBdirect` "exactly_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                               | `adj`      |   `adj_total` |   `LRC` |   `dP1` |      `G2` |   `f` |      `f1` |   `f2` |
 |:------------------------------|:-----------|--------------:|--------:|--------:|----------:|------:|----------:|-------:|
 | **NEGany~exactly_surprising** | surprising |    150,067.00 |    7.34 |    0.96 |  2,863.35 |   441 | 3,226,213 |    444 |
 | **NEGany~exactly_cheap**      | cheap      |     83,765.00 |    8.28 |    0.95 |  4,443.27 |   693 | 3,226,213 |    704 |
 | **NEGany~exactly_subtle**     | subtle     |     56,845.00 |    6.92 |    0.94 |  1,671.02 |   264 | 3,226,213 |    271 |
 | **NEGany~exactly_fun**        | fun        |    224,457.00 |    6.67 |    0.94 |  1,423.92 |   225 | 3,226,213 |    231 |
 | **NEGany~exactly_conducive**  | conducive  |     16,405.00 |    6.56 |    0.93 |  1,313.09 |   208 | 3,226,213 |    214 |
 | **NEGany~exactly_sure**       | sure       |    844,981.00 |    8.63 |    0.92 | 54,750.58 | 8,860 | 3,226,213 |  9,301 |
 | **NEGany~exactly_new**        | new        |    321,311.00 |    8.54 |    0.93 |  8,697.93 | 1,378 | 3,226,213 |  1,418 |
 | **NEGany~exactly_easy**       | easy       |    771,307.00 |    8.37 |    0.93 |  6,747.64 | 1,069 | 3,226,213 |  1,100 |
 | **NEGany~exactly_clear**      | clear      |    491,108.00 |    8.30 |    0.92 | 10,937.16 | 1,759 | 3,226,213 |  1,835 |
 | **NEGany~exactly_happy**      | happy      |    528,511.00 |    7.16 |    0.90 |  2,694.69 |   441 | 3,226,213 |    468 |

 No bigrams found in loaded `NEGmirror` AM table.

 ## #### 3. _that_


 ## Top 10 `RBdirect` "that_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                             | `adj`       |   `adj_total` |   `LRC` |   `dP1` |      `G2` |   `f` |      `f1` |   `f2` |
 |:----------------------------|:------------|--------------:|--------:|--------:|----------:|------:|----------:|-------:|
 | **NEGany~that_uncommon**    | uncommon    |     61,767.00 |    8.39 |    0.94 |  5,136.91 |   804 | 3,226,213 |    819 |
 | **NEGany~that_fond**        | fond        |     39,809.00 |    7.27 |    0.94 |  2,127.94 |   334 | 3,226,213 |    341 |
 | **NEGany~that_surprising**  | surprising  |    150,067.00 |    8.14 |    0.92 |  7,115.30 | 1,141 | 3,226,213 |  1,187 |
 | **NEGany~that_common**      | common      |    556,435.00 |    8.12 |    0.92 |  7,564.08 | 1,216 | 3,226,213 |  1,268 |
 | **NEGany~that_dissimilar**  | dissimilar  |      8,816.00 |    7.00 |    0.92 |  1,904.15 |   307 | 3,226,213 |    321 |
 | **NEGany~that_hard**        | hard        |    430,990.00 |    7.96 |    0.88 | 59,642.82 | 9,966 | 3,226,213 | 10,818 |
 | **NEGany~that_complicated** | complicated |    180,071.00 |    7.95 |    0.91 |  7,450.89 | 1,208 | 3,226,213 |  1,270 |
 | **NEGany~that_impressed**   | impressed   |    113,281.00 |    7.57 |    0.91 |  4,207.58 |   684 | 3,226,213 |    721 |
 | **NEGany~that_noticeable**  | noticeable  |     40,372.00 |    6.78 |    0.91 |  1,632.07 |   265 | 3,226,213 |    279 |
 | **NEGany~that_exciting**    | exciting    |    236,396.00 |    7.48 |    0.90 |  4,892.83 |   805 | 3,226,213 |    859 |


 ## Top 10 `NEGmirror` "that_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                            | `adj`      |   `adj_total` |   `LRC` |   `dP1` |     `G2` |   `f` |    `f1` |   `f2` |
 |:---------------------------|:-----------|--------------:|--------:|--------:|---------:|------:|--------:|-------:|
 | **NEGmir~that_popular**    | popular    |      5,787.00 |    2.50 |    0.76 |   200.44 |    65 | 289,770 |     70 |
 | **NEGmir~that_interested** | interested |      9,258.00 |    2.42 |    0.76 |   190.06 |    62 | 289,770 |     67 |
 | **NEGmir~that_difficult**  | difficult  |     16,043.00 |    2.15 |    0.75 |   155.64 |    52 | 289,770 |     57 |
 | **NEGmir~that_hard**       | hard       |      7,311.00 |    2.31 |    0.74 |   168.31 |    57 | 289,770 |     63 |
 | **NEGmir~that_close**      | close      |     13,962.00 |    2.39 |    0.73 |   174.26 |    60 | 289,770 |     67 |
 | **NEGmir~that_simple**     | simple     |     25,382.00 |    4.34 |    0.73 | 1,370.94 |   473 | 289,770 |    529 |
 | **NEGmir~that_easy**       | easy       |     20,050.00 |    4.21 |    0.72 | 1,258.15 |   442 | 289,770 |    500 |
 | **NEGmir~that_great**      | great      |      5,819.00 |    3.52 |    0.67 |   728.46 |   282 | 289,770 |    340 |
 | **NEGmir~that_good**       | good       |     33,540.00 |    3.07 |    0.56 |   953.31 |   447 | 289,770 |    615 |
 | **NEGmir~that_big**        | big        |      7,859.00 |    3.06 |    0.70 |   309.58 |   113 | 289,770 |    131 |


 ## #### 4. _before_

 No bigrams found in loaded `RBdirect` AM table.
 No bigrams found in loaded `NEGmirror` AM table.

 ## #### 5. _any_


 ## Top 10 `RBdirect` "any_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                          | `adj`     |   `adj_total` |   `LRC` |   `dP1` |     `G2` |   `f` |      `f1` |   `f2` |
 |:-------------------------|:----------|--------------:|--------:|--------:|---------:|------:|----------:|-------:|
 | **NEGany~any_happier**   | happier   |     19,501.00 |    4.65 |    0.53 | 3,488.76 |   830 | 3,226,213 |  1,472 |
 | **NEGany~any_simpler**   | simpler   |     26,094.00 |    3.09 |    0.30 |   671.74 |   228 | 3,226,213 |    672 |
 | **NEGany~any_clearer**   | clearer   |     13,369.00 |    3.21 |    0.30 | 1,051.22 |   357 | 3,226,213 |  1,053 |
 | **NEGany~any_different** | different |    909,864.00 |    2.98 |    0.24 | 2,270.24 |   910 | 3,226,213 |  3,313 |
 | **NEGany~any_younger**   | younger   |     29,805.00 |    2.37 |    0.19 |   544.17 |   256 | 3,226,213 |  1,121 |
 | **NEGany~any_worse**     | worse     |    214,166.00 |    2.47 |    0.16 | 3,165.88 | 1,693 | 3,226,213 |  8,487 |
 | **NEGany~any_bigger**    | bigger    |    130,470.00 |    2.27 |    0.17 |   688.06 |   357 | 3,226,213 |  1,735 |
 | **NEGany~any_harder**    | harder    |     99,332.00 |    1.98 |    0.15 |   395.22 |   227 | 3,226,213 |  1,221 |
 | **NEGany~any_safer**     | safer     |     26,779.00 |    1.73 |    0.12 |   346.68 |   235 | 3,226,213 |  1,471 |
 | **NEGany~any_easier**    | easier    |    237,680.00 |    1.95 |    0.11 | 2,164.75 | 1,607 | 3,226,213 | 10,860 |


 ## Top 4 `NEGmirror` "any_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                       | `adj`   |   `adj_total` |   `LRC` |   `dP1` |     `G2` |   `f` |    `f1` |   `f2` |
 |:----------------------|:--------|--------------:|--------:|--------:|---------:|------:|--------:|-------:|
 | **NEGmir~any_better** | better  |     14,076.00 |    4.44 |    0.75 | 1,148.18 |   381 | 289,770 |    416 |
 | **NEGmir~any_easier** | easier  |      2,409.00 |    2.42 |    0.75 |   181.98 |    61 | 289,770 |     67 |
 | **NEGmir~any_worse**  | worse   |      8,490.00 |    2.87 |    0.72 |   248.63 |    88 | 289,770 |    100 |
 | **NEGmir~any_closer** | closer  |        986.00 |    2.21 |    0.68 |   149.62 |    56 | 289,770 |     66 |


 ## #### 6. _ever_


 ## Top 5 `RBdirect` "ever_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                         | `adj`   |   `adj_total` |   `LRC` |   `dP1` |     `G2` |   `f` |      `f1` |   `f2` |
 |:------------------------|:--------|--------------:|--------:|--------:|---------:|------:|----------:|-------:|
 | **NEGany~ever_simple**  | simple  |    427,167.00 |    5.54 |    0.77 | 1,142.04 |   212 | 3,226,213 |    262 |
 | **NEGany~ever_easy**    | easy    |    771,307.00 |    5.06 |    0.63 | 2,030.58 |   430 | 3,226,213 |    641 |
 | **NEGany~ever_good**    | good    |  2,037,285.00 |    3.76 |    0.40 | 1,178.00 |   332 | 3,226,213 |    756 |
 | **NEGany~ever_perfect** | perfect |    164,519.00 |    3.48 |    0.37 |   736.05 |   217 | 3,226,213 |    527 |
 | **NEGany~ever_able**    | able    |    428,268.00 |    1.81 |    0.13 |   363.95 |   234 | 3,226,213 |  1,398 |


 ## Top 6 `NEGmirror` "ever_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                         | `adj`   |   `adj_total` |   `LRC` |   `dP1` |     `G2` |   `f` |    `f1` |   `f2` |
 |:------------------------|:--------|--------------:|--------:|--------:|---------:|------:|--------:|-------:|
 | **NEGmir~ever_easy**    | easy    |     20,050.00 |    3.21 |    0.83 | 1,311.83 |   367 | 289,770 |    368 |
 | **NEGmir~ever_perfect** | perfect |      3,708.00 |    2.38 |    0.83 |   735.10 |   207 | 289,770 |    208 |
 | **NEGmir~ever_good**    | good    |     33,540.00 |    4.72 |    0.82 | 1,034.95 |   298 | 289,770 |    302 |
 | **NEGmir~ever_wrong**   | wrong   |     20,866.00 |    2.56 |    0.82 |   349.21 |   102 | 289,770 |    104 |
 | **NEGmir~ever_free**    | free    |      5,043.00 |    1.97 |    0.81 |   231.61 |    69 | 289,770 |     71 |
 | **NEGmir~ever_able**    | able    |      6,448.00 |    3.66 |    0.79 |   437.65 |   136 | 289,770 |    143 |


 ## #### 7. _yet_


 ## Top 10 `RBdirect` "yet_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                          | `adj`     |   `adj_total` |   `LRC` |   `dP1` |      `G2` |    `f` |      `f1` |   `f2` |
 |:-------------------------|:----------|--------------:|--------:|--------:|----------:|-------:|----------:|-------:|
 | **NEGany~yet_clear**     | clear     |    491,108.00 |   10.26 |    0.95 | 67,924.56 | 10,553 | 3,226,213 | 10,693 |
 | **NEGany~yet_eligible**  | eligible  |     49,578.00 |    7.72 |    0.94 |  2,929.15 |    459 | 3,226,213 |    468 |
 | **NEGany~yet_official**  | official  |      9,778.00 |    7.33 |    0.94 |  2,236.98 |    353 | 3,226,213 |    362 |
 | **NEGany~yet_ready**     | ready     |    240,297.00 |    9.23 |    0.93 | 48,012.06 |  7,611 | 3,226,213 |  7,838 |
 | **NEGany~yet_certain**   | certain   |    104,544.00 |    8.12 |    0.93 |  5,491.41 |    874 | 3,226,213 |    903 |
 | **NEGany~yet_complete**  | complete  |    107,018.00 |    8.42 |    0.92 | 13,815.99 |  2,220 | 3,226,213 |  2,314 |
 | **NEGany~yet_sure**      | sure      |    844,981.00 |    8.37 |    0.92 | 12,379.79 |  1,990 | 3,226,213 |  2,075 |
 | **NEGany~yet_available** | available |    866,272.00 |    7.69 |    0.87 | 44,196.15 |  7,481 | 3,226,213 |  8,238 |
 | **NEGany~yet_right**     | right     |    204,572.00 |    6.50 |    0.92 |  1,254.20 |    202 | 3,226,213 |    211 |
 | **NEGany~yet_final**     | final     |      9,657.00 |    7.45 |    0.91 |  4,028.75 |    659 | 3,226,213 |    699 |

 No bigrams found in loaded `NEGmirror` AM table.

 ## #### 8. _longer_


 ## Top 5 `RBdirect` "longer_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                          | `adj`     |   `adj_total` |   `LRC` |   `dP1` |   `G2` |    `f` |       `f1` |   `f2` |
 |:-------------------------|:----------|--------------:|--------:|--------:|-------:|-------:|-----------:|-------:|
 | **COM~longer_lasting**   | lasting   |     24,344.00 |    1.44 |    0.04 | 244.09 |  3,860 | 83,102,035 |  3,866 |
 | **COM~longer_enough**    | enough    |    453,790.00 |    1.41 |    0.03 | 216.98 |  3,952 | 83,102,035 |  3,964 |
 | **COM~longer_able**      | able      |    428,268.00 |    2.28 |    0.03 | 623.67 | 11,677 | 83,102,035 | 11,716 |
 | **COM~longer_available** | available |    866,272.00 |    2.45 |    0.03 | 974.55 | 18,865 | 83,102,035 | 18,935 |
 | **COM~longer_necessary** | necessary |    187,396.00 |    1.27 |    0.03 | 220.07 |  5,365 | 83,102,035 |  5,399 |

 No bigrams found in loaded `NEGmirror` AM table.

 ## #### 9. _immediately_


 ## Top 5 `RBdirect` "immediately_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                  | `adj`     |   `adj_total` |   `LRC` |   `dP1` |       `G2` |    `f` |      `f1` |   `f2` |
 |:---------------------------------|:----------|--------------:|--------:|--------:|-----------:|-------:|----------:|-------:|
 | **NEGany~immediately_possible**  | possible  |    364,265.00 |    7.68 |    0.90 |   6,269.26 |  1,027 | 3,226,213 |  1,091 |
 | **NEGany~immediately_clear**     | clear     |    491,108.00 |    8.32 |    0.90 | 153,302.22 | 25,276 | 3,226,213 | 27,066 |
 | **NEGany~immediately_available** | available |    866,272.00 |    5.77 |    0.66 | 102,962.94 | 21,297 | 3,226,213 | 30,725 |
 | **NEGany~immediately_able**      | able      |    428,268.00 |    4.87 |    0.58 |   2,851.84 |    639 | 3,226,213 |  1,036 |
 | **NEGany~immediately_obvious**   | obvious   |    193,498.00 |    4.59 |    0.49 |   9,043.23 |  2,258 | 3,226,213 |  4,305 |


 ## Top 1 `NEGmirror` "immediately_*" bigrams (sorted by `dP1`; `LRC > 1`)


 |                                  | `adj`     |   `adj_total` |   `LRC` |   `dP1` |   `G2` |   `f` |    `f1` |   `f2` |
 |:---------------------------------|:----------|--------------:|--------:|--------:|-------:|------:|--------:|-------:|
 | **NEGmir~immediately_available** | available |     12,636.00 |    1.94 |    0.43 | 254.47 |   162 | 289,770 |    274 |



In [8]:
# %%

bigram_dfs['RBdirect'].filter(like='~before_', axis=0)

Unnamed: 0_level_0,l2,f,E11,am_log_likelihood,...,adj,adv_total,adj_total,l1
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
NEGany~before_available,before_available,177,88.5,245.38,...,available,323.0,82956.0,NEGATED


In [9]:
# %%

for key, info in samples_dict.items():
    if key in ('bigrams', 'adj'):
        key = f'ALL {key.replace("adj", "adjectives")}'
    formatted_iter = [
        f'_{a.replace("_", " ")}_' for a
        in (info['adj'] if isinstance(info, dict)
            else info)]
    print_iter(formatted_iter,
               header=f'1. _{key}_ ({len(formatted_iter)} unique)',
               bullet='1.', indent=3)


1. _necessarily_ (11 unique)
   1. _aware_
   1. _better_
   1. _enough_
   1. _easy_
   1. _indicative_
   1. _true_
   1. _new_
   1. _related_
   1. _wrong_
   1. _surprising_
   1. _bad_

1. _that_ (16 unique)
   1. _unusual_
   1. _hard_
   1. _popular_
   1. _exciting_
   1. _close_
   1. _good_
   1. _easy_
   1. _difficult_
   1. _different_
   1. _big_
   1. _uncommon_
   1. _impressed_
   1. _simple_
   1. _surprising_
   1. _great_
   1. _interested_

1. _exactly_ (10 unique)
   1. _hard_
   1. _clear_
   1. _subtle_
   1. _easy_
   1. _new_
   1. _fun_
   1. _fair_
   1. _surprising_
   1. _cheap_
   1. _sure_

1. _before_ (1 unique)
   1. _available_

1. _any_ (11 unique)
   1. _harder_
   1. _better_
   1. _clearer_
   1. _simpler_
   1. _worse_
   1. _different_
   1. _cheaper_
   1. _happier_
   1. _easier_
   1. _closer_
   1. _younger_

1. _remotely_ (9 unique)
   1. _close_
   1. _interesting_
   1. _funny_
   1. _true_
   1. _related_
   1. _possible_
   1. _intere




 1. _necessarily_ (11 unique)
    1. _related_
    1. _easy_
    1. _wrong_
    1. _enough_
    1. _aware_
    1. _true_
    1. _surprising_
    1. _bad_
    1. _new_
    1. _better_
    1. _indicative_

 1. _that_ (16 unique)
    1. _simple_
    1. _exciting_
    1. _hard_
    1. _popular_
    1. _easy_
    1. _uncommon_
    1. _unusual_
    1. _different_
    1. _difficult_
    1. _great_
    1. _big_
    1. _surprising_
    1. _close_
    1. _impressed_
    1. _good_
    1. _interested_

 1. _exactly_ (10 unique)
    1. _new_
    1. _sure_
    1. _subtle_
    1. _easy_
    1. _hard_
    1. _cheap_
    1. _clear_
    1. _surprising_
    1. _fun_
    1. _fair_

 1. _before_ (1 unique)
    1. _available_

 1. _any_ (11 unique)
    1. _worse_
    1. _better_
    1. _younger_
    1. _cheaper_
    1. _clearer_
    1. _different_
    1. _simpler_
    1. _harder_
    1. _happier_
    1. _easier_
    1. _closer_

 1. _remotely_ (9 unique)
    1. _related_
    1. _comparable_
    1. _similar_
    1. _close_
    1. _true_
    1. _funny_
    1. _possible_
    1. _interesting_
    1. _interested_

 1. _ever_ (15 unique)
    1. _simple_
    1. _easy_
    1. _popular_
    1. _boring_
    1. _able_
    1. _wrong_
    1. _black_
    1. _perfect_
    1. _enough_
    1. _greater_
    1. _larger_
    1. _higher_
    1. _certain_
    1. _closer_
    1. _good_

 1. _yet_ (10 unique)
    1. _sure_
    1. _available_
    1. _able_
    1. _final_
    1. _public_
    1. _ready_
    1. _clear_
    1. _certain_
    1. _dead_
    1. _complete_

 1. _immediately_ (10 unique)
    1. _sure_
    1. _obvious_
    1. _available_
    1. _able_
    1. _apparent_
    1. _clear_
    1. _reachable_
    1. _forthcoming_
    1. _possible_
    1. _successful_

 1. _ALL bigrams_ (93 unique)
    1. _any worse_
    1. _any younger_
    1. _necessarily aware_
    1. _necessarily enough_
    1. _ever closer_
    1. _that difficult_
    1. _necessarily wrong_
    1. _ever simple_
    1. _remotely similar_
    1. _that hard_
    1. _any closer_
    1. _ever certain_
    1. _ever black_
    1. _ever boring_
    1. _that surprising_
    1. _that interested_
    1. _remotely interesting_
    1. _that big_
    1. _that exciting_
    1. _immediately reachable_
    1. _any clearer_
    1. _that simple_
    1. _ever higher_
    1. _exactly easy_
    1. _necessarily better_
    1. _yet dead_
    1. _remotely close_
    1. _remotely possible_
    1. _that great_
    1. _that impressed_
    1. _ever good_
    1. _immediately clear_
    1. _that uncommon_
    1. _immediately forthcoming_
    1. _exactly subtle_
    1. _ever larger_
    1. _immediately able_
    1. _that different_
    1. _any simpler_
    1. _exactly surprising_
    1. _any cheaper_
    1. _necessarily new_
    1. _that close_
    1. _necessarily easy_
    1. _exactly fair_
    1. _necessarily bad_
    1. _yet available_
    1. _yet complete_
    1. _yet ready_
    1. _ever wrong_
    1. _ever perfect_
    1. _ever popular_
    1. _yet able_
    1. _exactly fun_
    1. _remotely true_
    1. _remotely funny_
    1. _exactly hard_
    1. _ever enough_
    1. _that easy_
    1. _remotely related_
    1. _immediately obvious_
    1. _immediately successful_
    1. _yet certain_
    1. _immediately possible_
    1. _immediately available_
    1. _exactly new_
    1. _remotely comparable_
    1. _necessarily indicative_
    1. _any easier_
    1. _any harder_
    1. _remotely interested_
    1. _any different_
    1. _that good_
    1. _immediately apparent_
    1. _yet final_
    1. _yet sure_
    1. _necessarily true_
    1. _ever able_
    1. _immediately sure_
    1. _exactly sure_
    1. _necessarily surprising_
    1. _any better_
    1. _yet clear_
    1. _any happier_
    1. _that popular_
    1. _ever greater_
    1. _necessarily related_
    1. _exactly clear_
    1. _yet public_
    1. _exactly cheap_
    1. _ever easy_
    1. _that unusual_
    1. _before available_

 1. _ALL adjectives_ (64 unique)
    1. _exciting_
    1. _easy_
    1. _final_
    1. _uncommon_
    1. _comparable_
    1. _black_
    1. _different_
    1. _public_
    1. _larger_
    1. _funny_
    1. _certain_
    1. _new_
    1. _better_
    1. _harder_
    1. _fair_
    1. _closer_
    1. _simple_
    1. _worse_
    1. _cheaper_
    1. _clearer_
    1. _surprising_
    1. _bad_
    1. _higher_
    1. _impressed_
    1. _easier_
    1. _dead_
    1. _similar_
    1. _sure_
    1. _available_
    1. _hard_
    1. _able_
    1. _perfect_
    1. _difficult_
    1. _ready_
    1. _aware_
    1. _great_
    1. _clear_
    1. _forthcoming_
    1. _possible_
    1. _happier_
    1. _obvious_
    1. _interesting_
    1. _interested_
    1. _related_
    1. _subtle_
    1. _boring_
    1. _popular_
    1. _younger_
    1. _wrong_
    1. _apparent_
    1. _cheap_
    1. _enough_
    1. _unusual_
    1. _big_
    1. _reachable_
    1. _true_
    1. _close_
    1. _greater_
    1. _fun_
    1. _simpler_
    1. _successful_
    1. _indicative_
    1. _good_
    1. _complete_

In [10]:
# %%

NEG_bigrams_sample = pd.concat(
    (ad['both'] for ad in samples_dict.values() if isinstance(ad, dict))).sort_values('LRC', ascending=False)

In [11]:
# %%

top_NEGbigram_df_path = TOP_AM_TAG_DIR.joinpath(
    f'Top{K}_NEG-ADV_top-{bigram_k}-bigrams.{timestamp_today()}.csv')
print(top_NEGbigram_df_path)
NEG_bigrams_sample.to_csv(top_NEGbigram_df_path)
nb_show_table(NEG_bigrams_sample.sort_values('LRC', ascending=False),
              outpath=top_NEGbigram_df_path.with_suffix('.md'))

/share/compling/projects/sanpi/results/top_AM/ALL/Top6_NEG-ADV_top-10-bigrams.2024-07-25.csv

|                                    |    `f` |   `dP1` |   `dP1_simple` |   `LRC` |      `G2` |   `odds_r_disc` |   `t` |       `N` |      `f1` |   `f2` |   `exp_f` |   `unexp_f` | `l1`       | `l2`                    | `adj`       |   `adj_total` | `adv`       |   `adv_total` |
|:-----------------------------------|-------:|--------:|---------------:|--------:|----------:|----------------:|------:|----------:|----------:|-------:|----------:|------------:|:-----------|:------------------------|:------------|--------------:|:------------|--------------:|
| **NEGany~yet_clear**               | 10,406 |    0.50 |           1.00 |    8.66 | 14,392.25 |            3.47 | 50.99 | 6,347,364 | 3,173,660 | 10,409 |  5,204.46 |    5,201.54 | NEGATED    | yet_clear               | clear       |     84,227.00 | yet         |     53,881.00 |
| **NEGany~yet_ready**               |  7,501 |    0.50 |      

In [12]:
# %%

NEG_bigrams_sample.l1.value_counts()

l1
NEGATED       73
NEGMIR        35
COMPLEMENT     5
Name: count, dtype: int64

In [13]:
# %%

nb_show_table(NEG_bigrams_sample.filter(like='O', axis=0))


|                      |   `f` |   `dP1` |   `dP1_simple` |   `LRC` |   `G2` |   `odds_r_disc` |   `t` |       `N` |      `f1` |   `f2` |   `exp_f` |   `unexp_f` | `l1`       | `l2`         | `adj`   |   `adj_total` | `adv`   |   `adv_total` |
|:---------------------|------:|--------:|---------------:|--------:|-------:|----------------:|------:|----------:|----------:|-------:|----------:|------------:|:-----------|:-------------|:--------|--------------:|:--------|--------------:|
| **COM~ever_closer**  |   279 |    0.49 |           0.99 |    3.52 | 365.82 |            2.05 |  8.29 | 6,347,364 | 3,173,552 |    281 |    140.49 |      138.51 | COMPLEMENT | ever_closer  | closer  |      3,686.00 | ever    |     10,870.00 |
| **COM~ever_greater** |   186 |    0.49 |           0.99 |    3.09 | 246.80 |            2.09 |  6.78 | 6,347,364 | 3,173,552 |    187 |     93.50 |       92.50 | COMPLEMENT | ever_greater | greater |      6,949.00 | ever    |     10,870.00 |
| **COM~ever_larger**  