In [53]:
# coding=utf-8
from source.utils.general import print_iter
from pathlib import Path

import pandas as pd

from source.utils import PKL_SUFF, SAMPLE_ADV, print_iter, print_md_table, timestamp_today
from source.utils.associate import AM_DF_DIR, adjust_assoc_columns
from source.utils.sample import sample_pickle as sampkl


Set columns and diplay settings

In [54]:
FOCUS = ['f', 'E11', 'unexpected_f', 'unexpected_abs_sqrt',
         'am_p1_given2', 'conservative_log_ratio',
         'am_log_likelihood', 't_score',
         'mutual_information', 'am_odds_ratio_disc',
         'N', 'f1', 'f2', 'l1', 'l2']
pd.set_option('display.max_colwidth', 40)
pd.set_option('display.max_columns', 12)
pd.set_option('display.width', 250)
pd.set_option("display.precision", 2)
pd.set_option("styler.format.precision", 2)
pd.set_option("styler.format.thousands", ",")
pd.set_option("display.float_format", '{:,.2f}'.format)
pd.set_option("styler.render.repr", "html")

Set paths and load dataframes

In [55]:
POLAR_DIR = AM_DF_DIR.joinpath('polar')
SET_FLOOR = 2000
MIR_FLOOR = 100
polar_adv_dirs = []
# results/assoc_df/polar/RBdirect/adv/extra/polarized-adv_35f-7c_min5000x_extra.pkl.gz
adv_am_paths = {
    p.name: tuple(
        p.joinpath('adv/extra').glob(
            f'*35f-7c_min{SET_FLOOR if p.name == "RBdirect" else MIR_FLOOR}x*{PKL_SUFF}')
    )[0]
    for p in POLAR_DIR.iterdir()}

setdiff_adv = pd.read_pickle(adv_am_paths['RBdirect'])
# results/assoc_df/polar/NEGmirror/adv/extra/polarized-adv_MIRROR_polarized.35f-7c_min5000x_extra.pkl.gz
# mirror_adv = pd.read_pickle(
#     tuple(mirror_adv_dir.glob(f'*35f-7c_min{MIR_FLOOR}x*{PKL_SUFF}'))[0])
setdiff_adv.sample(3)

Unnamed: 0_level_0,l1,l2,f,E11,am_log_likelihood,am_odds_ratio_disc,...,conservative_log_ratio_05,conservative_log_ratio_nc,conservative_log_ratio_dv,f_sqrt,f1_sqrt,f2_sqrt
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
COM~violently,COMPLEMENT,violently,4325,4212.34,109.38,0.51,...,0.77,1.15,0.8,65.76,9116.03,66.15
COM~historically,COMPLEMENT,historically,55155,54168.42,576.89,0.28,...,0.74,0.82,0.74,234.85,9116.03,237.22
COM~medically,COMPLEMENT,medically,19420,20226.16,-668.35,-0.32,...,-0.9,-0.97,-0.91,139.36,9116.03,144.96


In [56]:
mirror_adv = pd.read_pickle(adv_am_paths['NEGmirror'])

mirror_adv.sample(3)

Unnamed: 0_level_0,l1,l2,f,E11,am_log_likelihood,am_odds_ratio_disc,...,conservative_log_ratio_05,conservative_log_ratio_nc,conservative_log_ratio_dv,f_sqrt,f1_sqrt,f2_sqrt
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
POS~impressively,POSMIR,impressively,142,124.88,23.18,0.73,...,0.0,0.53,0.0,11.92,1318.37,12.08
POS~actively,POSMIR,actively,337,320.75,6.25,0.17,...,0.0,0.0,0.0,18.36,1318.37,19.36
POS~remotely,POSMIR,remotely,871,2323.94,-4009.67,-1.1,...,-3.38,-3.48,-3.38,29.51,1318.37,52.12


In [57]:
def get_top_vals(df: pd.DataFrame,
                 index_like: str = 'NEG',
                 metric_filter: str | list = 'conservative_log_ratio',
                 k: int = 10,
                 val_col: str = None,
                 ignore_neg_adv: bool = True):
    env_df = df.copy().loc[df.conservative_log_ratio >=
                           1].filter(like=index_like, axis=0)
    if ignore_neg_adv:
        env_df = env_df.loc[~df.l2.isin(
            ("n't", 'not', 'barely', 'never', 'no', 'none')), :]
    if isinstance(metric_filter, str):
        metric_filter = [metric_filter]

    top = pd.concat([env_df.nlargest(k, m) for m in metric_filter]
                    ).drop_duplicates(keep='first')

    if val_col:
        top = top[[val_col] + metric_filter]

    return top.sort_values(metric_filter[0], ascending=False)


[top20_sd, top20_mr] = [get_top_vals(adv_df, k=20, metric_filter=['am_p1_given2', 'conservative_log_ratio'])
                        for adv_df in [setdiff_adv, mirror_adv]]
top20_sd.filter(items=FOCUS).reset_index()

Unnamed: 0,key,f,E11,unexpected_f,unexpected_abs_sqrt,am_p1_given2,...,am_odds_ratio_disc,N,f1,f2,l1,l2
0,NEG~necessarily,42708,2118.68,40589.32,201.47,0.72,...,1.9,86330752,3226213,56694,NEGATED,necessarily
1,NEG~exactly,43635,2301.98,41333.02,203.31,0.67,...,1.8,86330752,3226213,61599,NEGATED,exactly
2,NEG~that,165411,9357.24,156053.76,395.04,0.63,...,1.72,86330752,3226213,250392,NEGATED,that
3,NEG~immediately,57319,3855.76,53463.24,231.22,0.52,...,1.52,86330752,3226213,103177,NEGATED,immediately
4,NEG~yet,52546,3800.83,48745.17,220.78,0.48,...,1.45,86330752,3226213,101707,NEGATED,yet
5,NEG~terribly,18054,2622.43,15431.57,124.22,0.22,...,0.95,86330752,3226213,70174,NEGATED,terribly
6,NEG~remotely,5679,829.4,4849.6,69.64,0.22,...,0.95,86330752,3226213,22194,NEGATED,remotely
7,NEG~only,114070,17346.13,96723.87,311.0,0.21,...,0.94,86330752,3226213,464168,NEGATED,only
8,NEG~altogether,4575,771.17,3803.82,61.68,0.18,...,0.87,86330752,3226213,20636,NEGATED,altogether
9,NEG~entirely,63708,11354.35,52353.65,228.81,0.17,...,0.84,86330752,3226213,303833,NEGATED,entirely


In [58]:
top20_mr.filter(items=FOCUS).reset_index()

Unnamed: 0,key,f,E11,unexpected_f,unexpected_abs_sqrt,am_p1_given2,...,am_odds_ratio_disc,N,f1,f2,l1,l2
0,NEG~before,290,42.53,247.47,15.73,0.84,...,2.58,2032082,293963,294,NEGMIR,before
1,NEG~ever,4718,749.2,3968.8,63.0,0.77,...,1.79,2032082,293963,5179,NEGMIR,ever
2,NEG~exactly,813,161.15,651.85,25.53,0.59,...,1.2,2032082,293963,1114,NEGMIR,exactly
3,NEG~any,1082,219.02,862.98,29.38,0.57,...,1.17,2032082,293963,1514,NEGMIR,any
4,NEG~remotely,1846,393.04,1452.96,38.12,0.54,...,1.1,2032082,293963,2717,NEGMIR,remotely
5,NEG~particularly,9278,2163.26,7114.74,84.35,0.48,...,1.0,2032082,293963,14954,NEGMIR,particularly
6,NEG~that,4338,1080.91,3257.09,57.07,0.44,...,0.92,2032082,293963,7472,NEGMIR,that
7,NEG~necessarily,971,243.18,727.82,26.98,0.43,...,0.91,2032082,293963,1681,NEGMIR,necessarily
8,NEG~inherently,2872,817.19,2054.81,45.33,0.36,...,0.79,2032082,293963,5649,NEGMIR,inherently
9,NEG~again,182,54.83,127.17,11.28,0.34,...,0.74,2032082,293963,379,NEGMIR,again


In [59]:
def combine_top(df_1: pd.DataFrame,
                name_1: str,
                df_2: pd.DataFrame,
                name_2: str,
                env_filter: str = 'NEG',
                filter_items: list = FOCUS,
                k: int = 10) -> pd.DataFrame:

    top_dfs = [get_top_vals(adv_df, index_like=env_filter, k=k,
                            metric_filter=['am_p1_given2',
                                           'conservative_log_ratio']
                            )
               for adv_df in [df_1, df_2]]
    print(top_dfs[0].l2)
    print(top_dfs[1].l2)

    top_adv = top_dfs[0].l2.to_list()
    for adv in top_dfs[1].l2:
        if adv not in top_adv:
            top_adv.append(adv)

    df_1, df_2 = [d.filter(items=filter_items)
                  .filter(like=env_filter, axis=0)
                  .reset_index().set_index('l2')
                  for d in [df_1, df_2]]
    df_1 = adjust_assoc_columns(df_1)
    df_2 = adjust_assoc_columns(df_2)

    both = pd.DataFrame(index=top_adv)
    name_1, name_2 = [f"_{n.strip('_')}" for n in [name_1, name_2]]
    both = both.join(df_1).join(df_2, lsuffix=name_1,
                                rsuffix=name_2).sort_index(axis=1)
    # both['polar_approx'] = both.polar_approx.fillna('')(f'ADV_{name_1}')
    return both

## Compile top NEG~adverb associations across both approximation methods

In [60]:
sample_size = 8
C = combine_top(setdiff_adv.copy(), 'SET',
                mirror_adv.copy(), 'MIR', k=sample_size)


def load_backup(lower_floor: int = 100):
    backup_set_df = pd.read_pickle(tuple(adv_am_paths['RBdirect'].parent.glob(
        f'*35f-7c_min{lower_floor}x*{PKL_SUFF}'))[0])

    neg_set_backup = backup_set_df.filter(like='NEG', axis=0).filter(
        items=FOCUS).reset_index().set_index('l2')
    neg_set_backup.columns = pd.Series(
        adjust_assoc_columns(neg_set_backup.columns)) + '_SET'
    print(neg_set_backup.head())
    return neg_set_backup


if any(C.f_SET.isna()):
    neg_set_backup = load_backup()
    undefined = C.index[C.f_SET.isna()].to_list()
    print_iter(
        undefined, header=f'Adverbs with negated SET_DIFF tokens < {SET_FLOOR:,}', bullet='-')
    C.loc[undefined, neg_set_backup.columns] = neg_set_backup.loc[undefined, :]
    C.loc[undefined, :]

print_md_table(C.filter(regex=r'G2|P1|^f|LRC|unexpected_f'), n_dec=2)
C.index.name = 'adv'
C.to_csv(
    AM_DF_DIR / f'Top{sample_size}NEG-ADV_combined.35f-7c_{timestamp_today()}.csv')

key
NEG~necessarily    necessarily
NEG~exactly            exactly
NEG~that                  that
NEG~immediately    immediately
NEG~yet                    yet
NEG~terribly          terribly
NEG~remotely          remotely
NEG~only                  only
Name: l2, dtype: string
key
NEG~before                before
NEG~ever                    ever
NEG~exactly              exactly
NEG~any                      any
NEG~remotely            remotely
NEG~particularly    particularly
NEG~that                    that
NEG~necessarily      necessarily
Name: l2, dtype: string
                       key_SET   f_SET  exp_f_SET  unexp_f_SET  unexp_abs_sqrt_SET  dP1_SET  ...  MI_SET  odds_r_disc_SET     N_SET   f1_SET  f2_SET   l1_SET
l2                                                                                           ...                                                             
ornamentally  NEG~ornamentally     109       5.38       103.62               10.18     0.72  ...    1.31            

In [61]:
# results/assoc_df/polar/RBdirect/bigram/polarized-bigram_35f-7c_min1000x.pkl.gz
bigram_floor = 200
bigram_dfs = {d.name:
              pd.read_pickle(
                  tuple(d.joinpath('bigram/extra')
                        .glob(f'*35f-7c*min{bigram_floor//2 if d.name == "NEGmirror" else bigram_floor}x*.pkl.gz')
                        )[0])
              for d in POLAR_DIR.iterdir()}

In [62]:
def show_adv_bigrams(sample_size, C, bigram_dfs) -> dict:
    print('# Top bigrams corresponding to top adverbs\n')
    print(timestamp_today())
    patterns = list(bigram_dfs.keys())
    top_adverbs = C.LRC_SET.nlargest(sample_size).index
    bigram_samples = dict.fromkeys(top_adverbs)
    bigrams = []
    adj = []
    for adv in top_adverbs:
        print(f'\n## _{adv}_\n')
        adv_top = None
        bigram_samples[adv] = dict.fromkeys(patterns + ['both', 'adj'])
        adj_for_adv = []
        for pat, bdf in bigram_dfs.items():
            bdf = bdf[FOCUS+['adv', 'adj', 'adj_total']]
            bdf.columns = adjust_assoc_columns(bdf.columns)
            bdf = bdf.loc[bdf.LRC >= 1, ~bdf.columns.str.endswith('sqrt')]

            adv_pat_bigrams = bdf.loc[bdf.adv ==
                                      adv, :].nlargest(sample_size, 'LRC')
            # print(adv_top_bigrams)
            if adv_pat_bigrams.empty:
                print(f'No bigrams found in loaded `{pat}` AM table.')
            else:
                print_md_table(adv_pat_bigrams, n_dec=2,
                               title=f'### Top `{pat}` "{adv}_*" bigrams (sorted by LRC)')

            adj_for_adv.extend(adv_pat_bigrams.adj.drop_duplicates().to_list())

            bigram_samples[adv][pat] = adv_pat_bigrams

            adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams]) 

        bigram_samples[adv]['adj'] = set(adj_for_adv)
        bigrams.extend(adv_top.l2.drop_duplicates().to_list())
        adj.extend(adj_for_adv)
        bigram_samples[adv]['both'] = adv_top
    bigram_samples['bigrams'] = set(bigrams)
    bigram_samples['adj'] = set(adj)
    return bigram_samples


samples_dict = show_adv_bigrams(sample_size, C, bigram_dfs)

# Top bigrams corresponding to top adverbs

2024-05-11

## _necessarily_


### Top `NEGmirror` "necessarily_*" bigrams (sorted by LRC)

| key                   |   f |   exp_f |   unexp_f |   dP1 |   LRC |     G2 |     t |   MI |   odds_r_disc |         N |      f1 |   f2 | l1     | l2                | adv         | adj   |   adj_total |
|:----------------------|----:|--------:|----------:|------:|------:|-------:|------:|-----:|--------------:|----------:|--------:|-----:|:-------|:------------------|:------------|:------|------------:|
| NEG~necessarily_wrong | 213 |   33.71 |    179.29 |  0.77 |  4.19 | 693.55 | 12.29 | 0.80 |          1.79 | 2,032,082 | 293,963 |  233 | NEGMIR | necessarily_wrong | necessarily | wrong |   24,007.00 |


### Top `RBdirect` "necessarily_*" bigrams (sorted by LRC)

| key                            |     f |   exp_f |   unexp_f |   dP1 |   LRC |        G2 |     t |   MI |   odds_r_disc |          N |        f1 |    f2 | l1      | l2                     

  adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams])
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams])
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams])
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams])
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams])
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams])
  adv_top = adv_pat_bigrams if adv_top is None else pd.concat([adv_top, adv_pat_bigrams])


In [63]:
samples_dict['adj']

{'able',
 'apparent',
 'available',
 'bad',
 'beautiful',
 'big',
 'certain',
 'cheap',
 'clear',
 'common',
 'complete',
 'complicated',
 'concerned',
 'convenient',
 'delicious',
 'different',
 'easy',
 'eligible',
 'exciting',
 'final',
 'good',
 'great',
 'happy',
 'hard',
 'ideal',
 'illegal',
 'important',
 'impressed',
 'indicative',
 'ineffective',
 'interested',
 'interesting',
 'new',
 'obvious',
 'possible',
 'ready',
 'related',
 'representative',
 'simple',
 'stylish',
 'successful',
 'sure',
 'surprised',
 'surprising',
 'true',
 'uncommon',
 'unfair',
 'unnecessary',
 'unusual',
 'useful',
 'visible',
 'wrong'}

In [64]:
samples_dict['exactly']['adj']

{'cheap', 'clear', 'easy', 'happy', 'ideal', 'new', 'sure', 'surprising'}

In [65]:
samples_dict['necessarily']['adj']

{'easy',
 'illegal',
 'indicative',
 'interested',
 'related',
 'representative',
 'surprising',
 'true',
 'wrong'}

In [66]:
for key, info in samples_dict.items():
    if key in ('bigrams', 'adj'):
        key = f'ALL {key.replace("adj", "adjectives")}'
    formatted_iter = [
        f'_{a.replace("_", " ")}_' for a
        in (info['adj'] if isinstance(info, dict)
            else info)]
    print_iter(formatted_iter,
               header=f'+ _{key}_ ({len(formatted_iter)} unique)',
               bullet='+', indent=2)


+ _necessarily_ (9 unique)
  + _representative_
  + _illegal_
  + _indicative_
  + _related_
  + _wrong_
  + _interested_
  + _easy_
  + _surprising_
  + _true_

+ _exactly_ (8 unique)
  + _sure_
  + _new_
  + _clear_
  + _cheap_
  + _happy_
  + _ideal_
  + _easy_
  + _surprising_

+ _that_ (15 unique)
  + _common_
  + _important_
  + _complicated_
  + _impressed_
  + _great_
  + _unusual_
  + _hard_
  + _big_
  + _simple_
  + _bad_
  + _good_
  + _easy_
  + _surprising_
  + _exciting_
  + _uncommon_

+ _immediately_ (8 unique)
  + _possible_
  + _clear_
  + _obvious_
  + _apparent_
  + _successful_
  + _able_
  + _visible_
  + _available_

+ _yet_ (8 unique)
  + _sure_
  + _eligible_
  + _clear_
  + _final_
  + _complete_
  + _ready_
  + _certain_
  + _available_

+ _before_ (1 unique)
  + _available_

+ _terribly_ (9 unique)
  + _useful_
  + _interesting_
  + _impressed_
  + _surprised_
  + _different_
  + _wrong_
  + _interested_
  + _surprising_
  + _concerned_

+ _only_ (9 unique

In [67]:
all_top_adv_dfs = [ad['both']
                   for ad in samples_dict.values() if isinstance(ad, dict)]
NEG_bigrams_sample = pd.concat(all_top_adv_dfs).sort_values('LRC', ascending=False)
top_NEGbigram_df_path = AM_DF_DIR.joinpath(f'top{sample_size}_NEG-ADV_top-bigrams.{timestamp_today()}.csv')
print(str(top_NEGbigram_df_path))
NEG_bigrams_sample.to_csv(
    top_NEGbigram_df_path)
NEG_bigrams_sample

/share/compling/projects/sanpi/results/assoc_df/top8_NEG-ADV_top-bigrams.2024-05-11.csv


Unnamed: 0_level_0,f,exp_f,unexp_f,dP1,LRC,G2,...,f2,l1,l2,adv,adj,adj_total
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
NEG~yet_clear,10553,399.60,10153.40,0.95,10.26,67924.56,...,10693,NEGATED,yet_clear,yet,clear,491108.00
NEG~yet_ready,7611,292.91,7318.09,0.93,9.23,48012.06,...,7838,NEGATED,yet_ready,yet,ready,240297.00
NEG~exactly_sure,8860,347.58,8512.42,0.92,8.63,54750.58,...,9301,NEGATED,exactly_sure,exactly,sure,844981.00
NEG~exactly_new,1378,52.99,1325.01,0.93,8.54,8697.93,...,1418,NEGATED,exactly_new,exactly,new,321311.00
NEG~yet_complete,2220,86.48,2133.52,0.92,8.42,13815.99,...,2314,NEGATED,yet_complete,yet,complete,107018.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
NEG~immediately_available,164,43.98,120.02,0.39,1.91,258.42,...,304,NEGMIR,immediately_available,immediately,available,14919.00
POS~only_available,1030,891.26,138.74,0.13,1.64,237.39,...,1042,POSMIR,only_available,only,available,14919.00
NEG~that_important,115,34.43,80.57,0.34,1.47,153.47,...,238,NEGMIR,that_important,that,important,48905.00
NEG~that_bad,206,79.85,126.15,0.23,1.14,175.40,...,552,NEGMIR,that_bad,that,bad,12841.00


In [68]:
NEG_bigrams_sample.l1.unique()

<StringArray>
['NEGATED', 'NEGMIR', 'POSMIR']
Length: 3, dtype: string

## _exactly_

In [69]:
exactly_top = samples_dict['exactly']['both']
exactly_top

Unnamed: 0_level_0,f,exp_f,unexp_f,dP1,LRC,G2,...,f2,l1,l2,adv,adj,adj_total
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
NEG~exactly_sure,148,21.55,126.45,0.85,2.09,560.65,...,149,NEGMIR,exactly_sure,exactly,sure,11297.0
NEG~exactly_sure,8860,347.58,8512.42,0.92,8.63,54750.58,...,9301,NEGATED,exactly_sure,exactly,sure,844981.0
NEG~exactly_new,1378,52.99,1325.01,0.93,8.54,8697.93,...,1418,NEGATED,exactly_new,exactly,new,321311.0
NEG~exactly_easy,1069,41.11,1027.89,0.93,8.37,6747.64,...,1100,NEGATED,exactly_easy,exactly,easy,771307.0
NEG~exactly_clear,1759,68.57,1690.43,0.92,8.3,10937.16,...,1835,NEGATED,exactly_clear,exactly,clear,491108.0
NEG~exactly_cheap,693,26.31,666.69,0.95,8.28,4443.27,...,704,NEGATED,exactly_cheap,exactly,cheap,83765.0
NEG~exactly_surprising,441,16.59,424.41,0.96,7.34,2863.35,...,444,NEGATED,exactly_surprising,exactly,surprising,150067.0
NEG~exactly_happy,441,17.49,423.51,0.9,7.16,2694.69,...,468,NEGATED,exactly_happy,exactly,happy,528511.0
NEG~exactly_ideal,418,16.63,401.37,0.9,7.08,2546.29,...,445,NEGATED,exactly_ideal,exactly,ideal,42701.0


In [None]:


# topDPb_neg_adv = setdiff_adv.filter(
#     like='NEG', axis=0).nlargest(10, 'am_p1_given2').l2.to_list()
# topDPb_com_adv = setdiff_adv.filter(
#     like='COM', axis=0).nlargest(10, 'am_p1_given2').l2.to_list()

# # %%
# topLRC_neg_adv = setdiff_adv.filter(like='NEG', axis=0).nlargest(
#     10, 'conservative_log_ratio').l2.to_list()
# topLRC_com_adv = setdiff_adv.filter(like='COM', axis=0).nlargest(
#     10, 'conservative_log_ratio').l2.to_list()

# setdiff_adv.unexpected_f / setdiff_adv.E11
# setdiff_adv.unexpected_f / setdiff_adv.f
# setdiff_adv.unexpected_f / setdiff_adv.f2


# setdiff_adv.loc[setdiff_adv.l2.isin(topDPb_com_adv+)]

# setdiff_adv.filter(like='COM', axis=0).nlargest(10, 'am_p1_given2')