In [1]:
import pandas as pd 
import association_measures.binomial as bn
import association_measures.frequencies as fq
import association_measures.measures as am
from utils.dataframes import print_md_table
from utils.general import convert_ucs_to_csv as txt_to_csv, confirm_dir
import re

from pathlib import Path
from os import system
import utils as ut
import matplotlib.pyplot as plt
%matplotlib inline

SANPI_DIR = Path('/share/compling/projects/sanpi')
FRQ_THRESH=10
pd.set_option('display.max_colwidth', 20)
pd.set_option('display.max_columns', 10)
pd.set_option('display.width', 120)

In [2]:
def get_csv(unit):
    bare_input = (SANPI_DIR / 'results' / 'ucs_tables' / 'readable' /
                  f'polarized-{unit}_min{FRQ_THRESH}x.rsort-view')
    input_csv = bare_input.with_name(f'{bare_input.name}.csv')
    if input_csv.is_file():
        return input_csv

    input_txt = bare_input.with_name(f'{bare_input.name}.txt')
    if not input_txt.is_file():
        init_ucs = bare_input.parent.parent / \
            f'polarized-{unit}_min{FRQ_THRESH}x.ds.gz'
        if not init_ucs.is_file():
            initialize = f'python {SANPI_DIR}/script/polarize_tsv_for_ucs.py -w {unit} -m {FRQ_THRESH} -R'
            run_shell_command(initialize)

        transform = f'time bash {SANPI_DIR}/script/transform_ucs.sh \\\n  {init_ucs}'
        run_shell_command(transform)

    txt_to_csv(input_txt)

    return input_csv


def run_shell_command(command_str):
    print(command_str + '\n>>>')
    system(command_str)


def _pull_data():
    for unit in ('bigram', 'adv', 'adj'):

        yield unit, get_csv(unit)

In [3]:
csv_paths = pd.Series({u: p for u, p in _pull_data()})

print(csv_paths.to_frame('path to `ucs` scores').to_markdown())

|        | path to `ucs` scores                                                                              |
|:-------|:--------------------------------------------------------------------------------------------------|
| bigram | /share/compling/projects/sanpi/results/ucs_tables/readable/polarized-bigram_min10x.rsort-view.csv |
| adv    | /share/compling/projects/sanpi/results/ucs_tables/readable/polarized-adv_min10x.rsort-view.csv    |
| adj    | /share/compling/projects/sanpi/results/ucs_tables/readable/polarized-adj_min10x.rsort-view.csv    |


UCS Data Paths

|        | path to `ucs` scores                                                                              |
|:-------|:--------------------------------------------------------------------------------------------------|
| bigram | /share/compling/projects/sanpi/results/ucs_tables/readable/polarized-bigram_min10x.rsort-view.csv |
| adv    | /share/compling/projects/sanpi/results/ucs_tables/readable/polarized-adv_min10x.rsort-view.csv    |
| adj    | /share/compling/projects/sanpi/results/ucs_tables/readable/polarized-adj_min10x.rsort-view.csv    |


In [4]:
def load_from_ucs_csv(input_csv):
    df = pd.read_csv(input_csv)
    df['key'] = df.l1.apply(lambda x: x[:3]) + '-' + df.l2
    return df.reset_index().set_index('key')

In [5]:
dfs = {count_type:
       load_from_ucs_csv(csv_paths[count_type])
       for count_type in csv_paths.index}
adv_df = dfs['adv'].round(3)
adv_df

Unnamed: 0_level_0,index,l1,l2,f,E11,...,am_p2_given1_margin,am_expect_diff,f1,f2,N
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
NEG-necessarily,0,NEGATED,necessarily,41042,2046.636,...,0.012,38995.364,3151804,54081,83284343
NEG-exactly,1,NEGATED,exactly,42021,2250.728,...,0.013,39770.272,3151804,59474,83284343
NEG-that,2,NEGATED,that,165327,9442.847,...,0.049,155884.153,3151804,249521,83284343
NEG-immediately,3,NEGATED,immediately,57145,3828.402,...,0.017,53316.598,3151804,101163,83284343
NEG-yet,4,NEGATED,yet,51891,3670.593,...,0.015,48220.407,3151804,96993,83284343
...,...,...,...,...,...,...,...,...,...,...,...
COM-yet,1867,COMPLEMENT,yet,45102,93322.407,...,-0.001,-48220.407,80132539,96993,83284343
COM-immediately,1868,COMPLEMENT,immediately,44018,97334.598,...,-0.001,-53316.598,80132539,101163,83284343
COM-that,1869,COMPLEMENT,that,84194,240078.153,...,-0.002,-155884.153,80132539,249521,83284343
COM-exactly,1870,COMPLEMENT,exactly,17453,57223.272,...,-0.000,-39770.272,80132539,59474,83284343


## Top Values based on adjusted conditional probability 

> Probability of environment (`l1`) given bigram (`l2`): `p1.given2`

In [6]:
for count_type, df in dfs.items():
    print(f'\n### By {count_type}\n')
    n = 5 if count_type == 'bigram' else 8
    print(df.head(n).round(2).T.to_markdown(floatfmt=',.2f'))


### By bigram

|                      | NEG-exactly_shy   | NEG-exactly_forthcoming   | NEG-exactly_practical   | NEG-exactly_impressive   | NEG-exactly_straightforward   |
|:---------------------|:------------------|:--------------------------|:------------------------|:-------------------------|:------------------------------|
| index                | 0                 | 1                         | 2                       | 3                        | 4                             |
| l1                   | NEGATED           | NEGATED                   | NEGATED                 | NEGATED                  | NEGATED                       |
| l2                   | exactly_shy       | exactly_forthcoming       | exactly_practical       | exactly_impressive       | exactly_straightforward       |
| f                    | 124               | 107                       | 106                     | 100                      | 83                            |
| E11                  | 4.69       


### By bigram

|                      | NEG-exactly_shy | NEG-exactly_forthcoming | NEG-exactly_practical | NEG-exactly_impressive | NEG-exactly_straightforward |
|:---------------------|:----------------|:------------------------|:----------------------|:-----------------------|:----------------------------|
| index                | 0               | 1                       | 2                     | 3                      | 4                           |
| l1                   | NEGATED         | NEGATED                 | NEGATED               | NEGATED                | NEGATED                     |
| l2                   | exactly_shy     | exactly_forthcoming     | exactly_practical     | exactly_impressive     | exactly_straightforward     |
| f                    | 124             | 107                     | 106                   | 100                    | 83                          |
| E11                  | 4.69            | 4.05                    | 4.01                  | 3.78                   | 3.14                        |
| r_log_likelihood     | 1514            | 1728                    | 1744                  | 1839                   | 2164                        |
| r_log_likelihood_tt  | 2950            | 3340                    | 3370                  | 3548                   | 4147                        |
| r_odds_ratio_disc    | 1               | 2                       | 3                     | 4                      | 5                           |
| r_Dice               | 6166            | 7004                    | 7078                  | 7463                   | 8680                        |
| r_t_score            | 1727            | 1980                    | 2003                  | 2107                   | 2447                        |
| r_p1_given2          | 1               | 2                       | 3                     | 4                      | 5                           |
| r_p2_given1          | 4066            | 4630                    | 4675                  | 4910                   | 5729                        |
| r_p1_given2_simple   | 1               | 1                       | 1                     | 1                      | 1                           |
| r_p2_given1_simple   | 6166            | 7005                    | 7079                  | 7463                   | 8681                        |
| r_p1_given2_margin   | 1               | 1                       | 1                     | 1                      | 1                           |
| r_p2_given1_margin   | 2029            | 2305                    | 2326                  | 2435                   | 2837                        |
| r_expect_diff        | 4066            | 4630                    | 4675                  | 4910                   | 5729                        |
| am_log_likelihood    | 812.03          | 700.7                   | 694.15                | 654.86                 | 543.53                      |
| am_log_likelihood_tt | 812.03          | 700.7                   | 694.15                | 654.86                 | 543.53                      |
| am_odds_ratio_disc   | 3.8             | 3.74                    | 3.73                  | 3.71                   | 3.63                        |
| am_Dice              | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_t_score           | 10.71           | 9.95                    | 9.91                  | 9.62                   | 8.77                        |
| am_p1_given2         | 0.96            | 0.96                    | 0.96                  | 0.96                   | 0.96                        |
| am_p2_given1         | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_p1_given2_simple  | 1.0             | 1.0                     | 1.0                   | 1.0                    | 1.0                         |
| am_p2_given1_simple  | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_p1_given2_margin  | 0.96            | 0.96                    | 0.96                  | 0.96                   | 0.96                        |
| am_p2_given1_margin  | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_expect_diff       | 119.31          | 102.95                  | 101.99                | 96.22                  | 79.86                       |
| f1                   | 3151804         | 3151804                 | 3151804               | 3151804                | 3151804                     |
| f2                   | 124             | 107                     | 106                   | 100                    | 83                          |
| N                    | 83284343        | 83284343                | 83284343              | 83284343               | 83284343                    |

### By adv

|                      | NEG-necessarily | NEG-exactly | NEG-that  | NEG-immediately | NEG-yet   | NEG-terribly | NEG-remotely | NEG-only  |
|:---------------------|:----------------|:------------|:----------|:----------------|:----------|:-------------|:-------------|:----------|
| index                | 0               | 1           | 2         | 3               | 4         | 5            | 6            | 7         |
| l1                   | NEGATED         | NEGATED     | NEGATED   | NEGATED         | NEGATED   | NEGATED      | NEGATED      | NEGATED   |
| l2                   | necessarily     | exactly     | that      | immediately     | yet       | terribly     | remotely     | only      |
| f                    | 41042           | 42021       | 165327    | 57145           | 51891     | 17874        | 5513         | 110004    |
| E11                  | 2046.64         | 2250.73     | 9442.85   | 3828.4          | 3670.59   | 2578.68      | 810.16       | 16875.24  |
| r_log_likelihood     | 7               | 9           | 1         | 6               | 8         | 22           | 41           | 5         |
| r_log_likelihood_tt  | 13              | 17          | 1         | 11              | 15        | 43           | 81           | 9         |
| r_odds_ratio_disc    | 7               | 11          | 13        | 33              | 39        | 229          | 238          | 249       |
| r_Dice               | 32              | 30          | 7         | 21              | 22        | 46           | 98           | 12        |
| r_t_score            | 11              | 10          | 2         | 6               | 8         | 17           | 24           | 4         |
| r_p1_given2          | 1               | 2           | 3         | 4               | 5         | 6            | 7            | 8         |
| r_p2_given1          | 18              | 16          | 7         | 12              | 14        | 33           | 61           | 9         |
| r_p1_given2_simple   | 998             | 1002        | 1003      | 1004            | 1005      | 1010         | 1011         | 1012      |
| r_p2_given1_simple   | 32              | 31          | 11        | 23              | 26        | 47           | 102          | 14        |
| r_p1_given2_margin   | 1               | 2           | 3         | 4               | 5         | 6            | 7            | 8         |
| r_p2_given1_margin   | 11              | 10          | 3         | 7               | 9         | 18           | 30           | 5         |
| r_expect_diff        | 18              | 16          | 7         | 12              | 14        | 33           | 61           | 9         |
| am_log_likelihood    | 210533.42       | 205058.83   | 778276.84 | 240024.55       | 210078.03 | 42580.88     | 12911.7      | 250948.21 |
| am_log_likelihood_tt | 210533.42       | 205058.83   | 778276.84 | 240024.55       | 210078.03 | 42580.88     | 12911.7      | 250948.21 |
| am_odds_ratio_disc   | 1.91            | 1.79        | 1.72      | 1.53            | 1.47      | 0.96         | 0.95         | 0.93      |
| am_Dice              | 0.03            | 0.03        | 0.1       | 0.04            | 0.03      | 0.01         | 0.0          | 0.06      |
| am_t_score           | 192.49          | 194.01      | 383.38    | 223.04          | 211.68    | 114.41       | 63.34        | 280.79    |
| am_p1_given2         | 0.72            | 0.67        | 0.63      | 0.53            | 0.5       | 0.22         | 0.22         | 0.21      |
| am_p2_given1         | 0.01            | 0.01        | 0.05      | 0.02            | 0.02      | 0.01         | 0.0          | 0.03      |
| am_p1_given2_simple  | 0.76            | 0.71        | 0.66      | 0.56            | 0.53      | 0.26         | 0.26         | 0.25      |
| am_p2_given1_simple  | 0.01            | 0.01        | 0.05      | 0.02            | 0.02      | 0.01         | 0.0          | 0.03      |
| am_p1_given2_margin  | 0.72            | 0.67        | 0.62      | 0.53            | 0.5       | 0.22         | 0.22         | 0.21      |
| am_p2_given1_margin  | 0.01            | 0.01        | 0.05      | 0.02            | 0.02      | 0.0          | 0.0          | 0.03      |
| am_expect_diff       | 38995.36        | 39770.27    | 155884.15 | 53316.6         | 48220.41  | 15295.32     | 4702.84      | 93128.76  |
| f1                   | 3151804         | 3151804     | 3151804   | 3151804         | 3151804   | 3151804      | 3151804      | 3151804   |
| f2                   | 54081           | 59474       | 249521    | 101163          | 96993     | 68140        | 21408        | 445917    |
| N                    | 83284343        | 83284343    | 83284343  | 83284343        | 83284343  | 83284343     | 83284343     | 83284343  |

### By adj

|                      | NEG-shabby | NEG-farfetched | NEG-clear-cut | NEG-rosy | NEG-far-fetched | NEG-dissimilar | NEG-cut  | NEG-binding |
|:---------------------|:-----------|:---------------|:--------------|:---------|:----------------|:---------------|:---------|:------------|
| index                | 0          | 1              | 2             | 3        | 4               | 5              | 6        | 7           |
| l1                   | NEGATED    | NEGATED        | NEGATED       | NEGATED  | NEGATED         | NEGATED        | NEGATED  | NEGATED     |
| l2                   | shabby     | farfetched     | clear-cut     | rosy     | far-fetched     | dissimilar     | cut      | binding     |
| f                    | 5538       | 569            | 1368          | 2076     | 1969            | 2524           | 385      | 683         |
| E11                  | 301.35     | 66.98          | 166.02        | 252.23   | 258.44          | 332.08         | 58.58    | 106.23      |
| r_log_likelihood     | 10         | 151            | 81            | 54       | 61              | 47             | 234      | 164         |
| r_log_likelihood_tt  | 19         | 301            | 161           | 107      | 121             | 93             | 467      | 327         |
| r_odds_ratio_disc    | 12         | 110            | 116           | 118      | 136             | 137            | 186      | 194         |
| r_Dice               | 204        | 1414           | 734           | 523      | 543             | 430            | 1838     | 1245        |
| r_t_score            | 19         | 107            | 70            | 55       | 56              | 47             | 148      | 103         |
| r_p1_given2          | 1          | 2              | 3             | 4        | 5               | 6              | 7        | 8           |
| r_p2_given1          | 56         | 459            | 227           | 160      | 171             | 141            | 610      | 414         |
| r_p1_given2_simple   | 3891       | 3895           | 3896          | 3897     | 3899            | 3900           | 3901     | 3902        |
| r_p2_given1_simple   | 209        | 1415           | 737           | 526      | 548             | 437            | 1838     | 1245        |
| r_p1_given2_margin   | 1          | 2              | 3             | 4        | 5               | 6              | 7        | 8           |
| r_p2_given1_margin   | 33         | 170            | 101           | 74       | 79              | 67             | 216      | 157         |
| r_expect_diff        | 56         | 459            | 227           | 160      | 171             | 141            | 610      | 414         |
| am_log_likelihood    | 26673.12   | 1595.87        | 3747.09       | 5681.7   | 5066.42         | 6482.12        | 874.39   | 1521.59     |
| am_log_likelihood_tt | 26673.12   | 1595.87        | 3747.09       | 5681.7   | 5066.42         | 6482.12        | 874.39   | 1521.59     |
| am_odds_ratio_disc   | 1.76       | 1.08           | 1.06          | 1.06     | 1.01            | 1.01           | 0.93     | 0.91        |
| am_Dice              | 0.0        | 0.0            | 0.0           | 0.0      | 0.0             | 0.0            | 0.0      | 0.0         |
| am_t_score           | 70.37      | 21.05          | 32.5          | 40.03    | 38.55           | 43.63          | 16.64    | 22.07       |
| am_p1_given2         | 0.66       | 0.28           | 0.27          | 0.27     | 0.25            | 0.25           | 0.21     | 0.21        |
| am_p2_given1         | 0.0        | 0.0            | 0.0           | 0.0      | 0.0             | 0.0            | 0.0      | 0.0         |
| am_p1_given2_simple  | 0.7        | 0.32           | 0.31          | 0.31     | 0.29            | 0.29           | 0.25     | 0.24        |
| am_p2_given1_simple  | 0.0        | 0.0            | 0.0           | 0.0      | 0.0             | 0.0            | 0.0      | 0.0         |
| am_p1_given2_margin  | 0.66       | 0.28           | 0.27          | 0.27     | 0.25            | 0.25           | 0.21     | 0.21        |
| am_p2_given1_margin  | 0.0        | 0.0            | 0.0           | 0.0      | 0.0             | 0.0            | 0.0      | 0.0         |
| am_expect_diff       | 5236.65    | 502.02         | 1201.98       | 1823.77  | 1710.56         | 2191.92        | 326.42   | 576.77      |
| f1                   | 3151804    | 3151804        | 3151804       | 3151804  | 3151804         | 3151804        | 3151804  | 3151804     |
| f2                   | 7963       | 1770           | 4387          | 6665     | 6829            | 8775           | 1548     | 2807        |
| N                    | 83284343   | 83284343       | 83284343      | 83284343 | 83284343        | 83284343       | 83284343 | 83284343    |


## Top `NEGATED` values

In [7]:
for count_type, df in dfs.items():
    print(f'\n### Top `NEGATED`: By {count_type}\n')
    n = 5 if count_type == 'bigram' else 8
    print(df.filter(like='NEG', axis=0).filter(regex=r'^[^r]').head(
        n).round(3).T.to_markdown(floatfmt=',.3f'))


### Top `NEGATED`: By bigram

|                      | NEG-exactly_shy   | NEG-exactly_forthcoming   | NEG-exactly_practical   | NEG-exactly_impressive   | NEG-exactly_straightforward   |
|:---------------------|:------------------|:--------------------------|:------------------------|:-------------------------|:------------------------------|
| index                | 0                 | 1                         | 2                       | 3                        | 4                             |
| l1                   | NEGATED           | NEGATED                   | NEGATED                 | NEGATED                  | NEGATED                       |
| l2                   | exactly_shy       | exactly_forthcoming       | exactly_practical       | exactly_impressive       | exactly_straightforward       |
| f                    | 124               | 107                       | 106                     | 100                      | 83                            |
| E11                


### Top `NEGATED`: By bigram

|                      | NEG-exactly_shy | NEG-exactly_forthcoming | NEG-exactly_practical | NEG-exactly_impressive | NEG-exactly_straightforward |
|:---------------------|:----------------|:------------------------|:----------------------|:-----------------------|:----------------------------|
| index                | 0               | 1                       | 2                     | 3                      | 4                           |
| l1                   | NEGATED         | NEGATED                 | NEGATED               | NEGATED                | NEGATED                     |
| l2                   | exactly_shy     | exactly_forthcoming     | exactly_practical     | exactly_impressive     | exactly_straightforward     |
| f                    | 124             | 107                     | 106                   | 100                    | 83                          |
| E11                  | 4.693           | 4.049                   | 4.011                 | 3.784                  | 3.141                       |
| am_log_likelihood    | 812.028         | 700.701                 | 694.152               | 654.86                 | 543.534                     |
| am_log_likelihood_tt | 812.028         | 700.701                 | 694.152               | 654.86                 | 543.534                     |
| am_odds_ratio_disc   | 3.801           | 3.738                   | 3.734                 | 3.708                  | 3.628                       |
| am_Dice              | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_t_score           | 10.714          | 9.953                   | 9.906                 | 9.622                  | 8.766                       |
| am_p1_given2         | 0.962           | 0.962                   | 0.962                 | 0.962                  | 0.962                       |
| am_p2_given1         | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_p1_given2_simple  | 1.0             | 1.0                     | 1.0                   | 1.0                    | 1.0                         |
| am_p2_given1_simple  | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_p1_given2_margin  | 0.962           | 0.962                   | 0.962                 | 0.962                  | 0.962                       |
| am_p2_given1_margin  | 0.0             | 0.0                     | 0.0                   | 0.0                    | 0.0                         |
| am_expect_diff       | 119.307         | 102.951                 | 101.989               | 96.216                 | 79.859                      |
| f1                   | 3151804         | 3151804                 | 3151804               | 3151804                | 3151804                     |
| f2                   | 124             | 107                     | 106                   | 100                    | 83                          |
| N                    | 83284343        | 83284343                | 83284343              | 83284343               | 83284343                    |

### Top `NEGATED`: By adv

|                      | NEG-necessarily | NEG-exactly | NEG-that   | NEG-immediately | NEG-yet    | NEG-terribly | NEG-remotely | NEG-only   |
|:---------------------|:----------------|:------------|:-----------|:----------------|:-----------|:-------------|:-------------|:-----------|
| index                | 0               | 1           | 2          | 3               | 4          | 5            | 6            | 7          |
| l1                   | NEGATED         | NEGATED     | NEGATED    | NEGATED         | NEGATED    | NEGATED      | NEGATED      | NEGATED    |
| l2                   | necessarily     | exactly     | that       | immediately     | yet        | terribly     | remotely     | only       |
| f                    | 41042           | 42021       | 165327     | 57145           | 51891      | 17874        | 5513         | 110004     |
| E11                  | 2046.636        | 2250.728    | 9442.847   | 3828.402        | 3670.593   | 2578.683     | 810.162      | 16875.236  |
| am_log_likelihood    | 210533.425      | 205058.828  | 778276.843 | 240024.553      | 210078.026 | 42580.876    | 12911.705    | 250948.213 |
| am_log_likelihood_tt | 210533.425      | 205058.828  | 778276.843 | 240024.553      | 210078.026 | 42580.876    | 12911.705    | 250948.213 |
| am_odds_ratio_disc   | 1.909           | 1.793       | 1.721      | 1.526           | 1.473      | 0.958        | 0.946        | 0.934      |
| am_Dice              | 0.026           | 0.026       | 0.097      | 0.035           | 0.032      | 0.011        | 0.003        | 0.061      |
| am_t_score           | 192.486         | 194.011     | 383.381    | 223.035         | 211.682    | 114.406      | 63.338       | 280.789    |
| am_p1_given2         | 0.722           | 0.669       | 0.627      | 0.528           | 0.498      | 0.225        | 0.22         | 0.21       |
| am_p2_given1         | 0.013           | 0.013       | 0.051      | 0.018           | 0.016      | 0.005        | 0.002        | 0.031      |
| am_p1_given2_simple  | 0.759           | 0.707       | 0.663      | 0.565           | 0.535      | 0.262        | 0.258        | 0.247      |
| am_p2_given1_simple  | 0.013           | 0.013       | 0.052      | 0.018           | 0.016      | 0.006        | 0.002        | 0.035      |
| am_p1_given2_margin  | 0.721           | 0.669       | 0.625      | 0.527           | 0.497      | 0.224        | 0.22         | 0.209      |
| am_p2_given1_margin  | 0.012           | 0.013       | 0.049      | 0.017           | 0.015      | 0.005        | 0.001        | 0.03       |
| am_expect_diff       | 38995.364       | 39770.272   | 155884.153 | 53316.598       | 48220.407  | 15295.317    | 4702.838     | 93128.764  |
| f1                   | 3151804         | 3151804     | 3151804    | 3151804         | 3151804    | 3151804      | 3151804      | 3151804    |
| f2                   | 54081           | 59474       | 249521     | 101163          | 96993      | 68140        | 21408        | 445917     |
| N                    | 83284343        | 83284343    | 83284343   | 83284343        | 83284343   | 83284343     | 83284343     | 83284343   |

### Top `NEGATED`: By adj

|                      | NEG-shabby | NEG-farfetched | NEG-clear-cut | NEG-rosy | NEG-far-fetched | NEG-dissimilar | NEG-cut  | NEG-binding |
|:---------------------|:-----------|:---------------|:--------------|:---------|:----------------|:---------------|:---------|:------------|
| index                | 0          | 1              | 2             | 3        | 4               | 5              | 6        | 7           |
| l1                   | NEGATED    | NEGATED        | NEGATED       | NEGATED  | NEGATED         | NEGATED        | NEGATED  | NEGATED     |
| l2                   | shabby     | farfetched     | clear-cut     | rosy     | far-fetched     | dissimilar     | cut      | binding     |
| f                    | 5538       | 569            | 1368          | 2076     | 1969            | 2524           | 385      | 683         |
| E11                  | 301.351    | 66.984         | 166.021       | 252.23   | 258.436         | 332.08         | 58.582   | 106.228     |
| am_log_likelihood    | 26673.119  | 1595.866       | 3747.089      | 5681.699 | 5066.423        | 6482.123       | 874.387  | 1521.586    |
| am_log_likelihood_tt | 26673.119  | 1595.866       | 3747.089      | 5681.699 | 5066.423        | 6482.123       | 874.387  | 1521.586    |
| am_odds_ratio_disc   | 1.765      | 1.081          | 1.062         | 1.061    | 1.013           | 1.012          | 0.926    | 0.913       |
| am_Dice              | 0.004      | 0.0            | 0.001         | 0.001    | 0.001           | 0.002          | 0.0      | 0.0         |
| am_t_score           | 70.368     | 21.046         | 32.498        | 40.027   | 38.549          | 43.629         | 16.636   | 22.07       |
| am_p1_given2         | 0.658      | 0.284          | 0.274         | 0.274    | 0.251           | 0.25           | 0.211    | 0.205       |
| am_p2_given1         | 0.002      | 0.0            | 0.0           | 0.001    | 0.001           | 0.001          | 0.0      | 0.0         |
| am_p1_given2_simple  | 0.695      | 0.321          | 0.312         | 0.311    | 0.288           | 0.288          | 0.249    | 0.243       |
| am_p2_given1_simple  | 0.002      | 0.0            | 0.0           | 0.001    | 0.001           | 0.001          | 0.0      | 0.0         |
| am_p1_given2_margin  | 0.658      | 0.284          | 0.274         | 0.274    | 0.25            | 0.25           | 0.211    | 0.205       |
| am_p2_given1_margin  | 0.002      | 0.0            | 0.0           | 0.001    | 0.001           | 0.001          | 0.0      | 0.0         |
| am_expect_diff       | 5236.649   | 502.016        | 1201.979      | 1823.77  | 1710.564        | 2191.92        | 326.418  | 576.772     |
| f1                   | 3151804    | 3151804        | 3151804       | 3151804  | 3151804         | 3151804        | 3151804  | 3151804     |
| f2                   | 7963       | 1770           | 4387          | 6665     | 6829            | 8775           | 1548     | 2807        |
| N                    | 83284343   | 83284343       | 83284343      | 83284343 | 83284343        | 83284343       | 83284343 | 83284343    |


In [8]:
def print_example(df, 
                  count_type=None, 
                  example_key=None, 
                  round_level=2, 
                  sort_by='am_p1_given2', 
                  columns_like=r'^([^ECORr_]|E11)', 
                  regex=False) -> None:
    """
    Prints a specific example from a dataframe.

    Args:
        df (pandas.DataFrame): The dataframe to extract the example from.
        count_type (str, optional): The type of count to consider. Defaults to None.
        example_key (str, optional): The key of the example to print. Defaults to None.
        round_level (int, optional): The number of decimal places to round the example values to. Defaults to 2.
    """
    if not example_key:
        example_keys = {'bigram': 'exactly_sure',
                        'adv': 'exactly',
                        'adj': 'sure'}
        example_key = example_keys[count_type]
    if regex:
        example = df.round(round_level).filter(axis=0, regex=example_key)
    else:
        example = df.round(round_level).filter(axis=0, like=example_key)
    if sort_by not in example.columns:
        sort_by = example.columns.iloc[0]
    example = example.sort_values(
        sort_by, ascending=sort_by.startswith(('r_', 'l')))
    example = example.filter(regex=columns_like).sort_index(axis=1)
    if example.empty:
        print(f'🤷 No {count_type} match {example_key}')
    else: 
        transpose = example.shape[0] < example.shape[1] * .9
        print_md_table(example, transpose=transpose, n_dec=round_level,
                    title=f'### {count_type.capitalize()} "{example_key}" examples sorted by `{sort_by}` column\n')
    print('\n---')

```python
for count_type, df in dfs.items():
    
    print_example(df, count_type)
```


### Bigram "exactly_sure" examples sorted by `am_p1_given2` column

|                      | NEG-exactly_sure   | COM-exactly_sure   |
|:---------------------|:-------------------|:-------------------|
| E11                  | 351.99             | 8949.01            |
| N                    | 83284343           | 83284343           |
| am_Dice              | 0.01               | 0.0                |
| am_expect_diff       | 8512.01            | -8512.01           |
| am_log_likelihood    | 54578.39           | -54578.39          |
| am_log_likelihood_tt | 54578.39           | 54578.39           |
| am_odds_ratio_disc   | 2.71               | -2.71              |
| am_p1_given2         | 0.92               | -0.92              |
| am_p1_given2_margin  | 0.92               | -0.92              |
| am_p1_given2_simple  | 0.95               | 0.05               |
| am_p2_given1         | 0.0                | -0.0               |
| am_p2_given1_margin  | 0.0                | -0.0               |
| am_p2_given1_simple  | 0.0                | 0.0                |
| am_t_score           | 90.41              | -407.18            |
| f                    | 8864               | 437                |
| f1                   | 3151804            | 80132539           |
| f2                   | 9301               | 9301               |
| index                | 383                | 246896             |
| l1                   | NEGATED            | COMPLEMENT         |
| l2                   | exactly_sure       | exactly_sure       |

---

### Adv "exactly" examples sorted by `am_p1_given2` column

|                      | NEG-exactly   | COM-exactly   |
|:---------------------|:--------------|:--------------|
| E11                  | 2250.73       | 57223.27      |
| N                    | 83284343      | 83284343      |
| am_Dice              | 0.03          | 0.0           |
| am_expect_diff       | 39770.27      | -39770.27     |
| am_log_likelihood    | 205058.83     | -205058.83    |
| am_log_likelihood_tt | 205058.83     | 205058.83     |
| am_odds_ratio_disc   | 1.79          | -1.79         |
| am_p1_given2         | 0.67          | -0.67         |
| am_p1_given2_margin  | 0.67          | -0.67         |
| am_p1_given2_simple  | 0.71          | 0.29          |
| am_p2_given1         | 0.01          | -0.01         |
| am_p2_given1_margin  | 0.01          | -0.0          |
| am_p2_given1_simple  | 0.01          | 0.0           |
| am_t_score           | 194.01        | -301.04       |
| f                    | 42021         | 17453         |
| f1                   | 3151804       | 80132539      |
| f2                   | 59474         | 59474         |
| index                | 1             | 1870          |
| l1                   | NEGATED       | COMPLEMENT    |
| l2                   | exactly       | exactly       |

---

### Adj "sure" examples sorted by `am_p1_given2` column

|                      | NEG-sure   | COM-unsure   | COM-uninsured   | NEG-uninsured   | NEG-unsure   | COM-sure   |
|:---------------------|:-----------|:-------------|:----------------|:----------------|:-------------|:-----------|
| E11                  | 31942.75   | 13039.14     | 3258.82         | 128.18          | 512.86       | 812123.25  |
| N                    | 83284343   | 83284343     | 83284343        | 83284343        | 83284343     | 83284343   |
| am_Dice              | 0.06       | 0.0          | 0.0             | 0.0             | 0.0          | 0.02       |
| am_expect_diff       | 96870.25   | 449.86       | 109.18          | -109.18         | -449.86      | -96870.25  |
| am_log_likelihood    | 180698.78  | 650.93       | 149.43          | -149.43         | -650.93      | -180698.78 |
| am_log_likelihood_tt | 180698.78  | 650.93       | 149.43          | 149.43          | 650.93       | 180698.78  |
| am_odds_ratio_disc   | 0.67       | 0.92         | 0.83            | -0.83           | -0.92        | -0.67      |
| am_p1_given2         | 0.12       | 0.03         | 0.03            | -0.03           | -0.03        | -0.12      |
| am_p1_given2_margin  | 0.11       | 0.03         | 0.03            | -0.03           | -0.03        | -0.11      |
| am_p1_given2_simple  | 0.15       | 1.0          | 0.99            | 0.01            | 0.0          | 0.85       |
| am_p2_given1         | 0.03       | 0.0          | 0.0             | -0.0            | -0.0         | -0.03      |
| am_p2_given1_margin  | 0.03       | 0.0          | 0.0             | -0.0            | -0.0         | -0.0       |
| am_p2_given1_simple  | 0.04       | 0.0          | 0.0             | 0.0             | 0.0          | 0.01       |
| am_t_score           | 269.9      | 3.87         | 1.88            | -25.05          | -56.68       | -114.54    |
| f                    | 128813     | 13489        | 3368            | 19              | 63           | 715253     |
| f1                   | 3151804    | 80132539     | 80132539        | 3151804         | 3151804      | 80132539   |
| f2                   | 844066     | 13552        | 3387            | 3387            | 13552        | 844066     |
| index                | 24         | 415          | 480             | 7254            | 7304         | 7563       |
| l1                   | NEGATED    | COMPLEMENT   | COMPLEMENT      | NEGATED         | NEGATED      | COMPLEMENT |
| l2                   | sure       | unsure       | uninsured       | uninsured       | unsure       | sure       |

In [9]:
def add_extra_am(df_dict):
    for count_type, df in df_dict.items():
        try:
            scores = am.score(df)
        except KeyError:
            df = df.join(fq.observed_frequencies(df)).join(
                fq.expected_frequencies(df))
            scores = am.score(df)
        # loaded_cols = df.columns.to_list()
        df = df.join(scores.loc[:, ~scores.columns.isin(df.columns)])

        print_example(df, count_type, columns_like=r'^[^ECORr]')
        df_dict[count_type] = df
    return df_dict

dfs_plus = add_extra_am(dfs)


### Bigram "exactly_sure" examples sorted by `am_p1_given2` column

|                          | NEG-exactly_sure   | COM-exactly_sure   |
|:-------------------------|:-------------------|:-------------------|
| N                        | 83284343           | 83284343           |
| am_Dice                  | 0.01               | 0.0                |
| am_expect_diff           | 8512.01            | -8512.01           |
| am_log_likelihood        | 54578.39           | -54578.39          |
| am_log_likelihood_tt     | 54578.39           | 54578.39           |
| am_odds_ratio_disc       | 2.71               | -2.71              |
| am_p1_given2             | 0.92               | -0.92              |
| am_p1_given2_margin      | 0.92               | -0.92              |
| am_p1_given2_simple      | 0.95               | 0.05               |
| am_p2_given1             | 0.0                | -0.0               |
| am_p2_given1_margin      | 0.0                | -0.0               |
| am_p2_


### Bigram "exactly_sure" examples sorted by `am_p1_given2` column

|                          | NEG-exactly_sure   | COM-exactly_sure   |
|:-------------------------|:-------------------|:-------------------|
| N                        | 83284343           | 83284343           |
| am_Dice                  | 0.01               | 0.0                |
| am_expect_diff           | 8512.01            | -8512.01           |
| am_log_likelihood        | 54578.39           | -54578.39          |
| am_log_likelihood_tt     | 54578.39           | 54578.39           |
| am_odds_ratio_disc       | 2.71               | -2.71              |
| am_p1_given2             | 0.92               | -0.92              |
| am_p1_given2_margin      | 0.92               | -0.92              |
| am_p1_given2_simple      | 0.95               | 0.05               |
| am_p2_given1             | 0.0                | -0.0               |
| am_p2_given1_margin      | 0.0                | -0.0               |
| am_p2_given1_simple      | 0.0                | 0.0                |
| am_t_score               | 90.41              | -407.18            |
| conservative_log_ratio   | 8.61               | -8.61              |
| dice                     | 0.01               | 0.0                |
| f                        | 8864               | 437                |
| f1                       | 3151804            | 80132539           |
| f2                       | 9301               | 9301               |
| index                    | 383                | 246896             |
| ipm                      | 2812.36            | 5.45               |
| ipm_expected             | 111.68             | 111.68             |
| ipm_reference            | 5.45               | 2812.36            |
| l1                       | NEGATED            | COMPLEMENT         |
| l2                       | exactly_sure       | exactly_sure       |
| liddell                  | 0.92               | -0.92              |
| local_mutual_information | 12419.39           | -573.04            |
| log_likelihood           | 54578.39           | -54578.39          |
| log_ratio                | 9.01               | -9.01              |
| min_sensitivity          | 0.0                | 0.0                |
| mutual_information       | 1.4                | -1.31              |
| simple_ll                | 40169.37           | -14385.1           |
| t_score                  | 90.41              | -407.18            |
| z_score                  | 453.7              | -89.98             |

---

### Adv "exactly" examples sorted by `am_p1_given2` column

|                          | NEG-exactly   | COM-exactly   |
|:-------------------------|:--------------|:--------------|
| N                        | 83284343      | 83284343      |
| am_Dice                  | 0.03          | 0.0           |
| am_expect_diff           | 39770.27      | -39770.27     |
| am_log_likelihood        | 205058.83     | -205058.83    |
| am_log_likelihood_tt     | 205058.83     | 205058.83     |
| am_odds_ratio_disc       | 1.79          | -1.79         |
| am_p1_given2             | 0.67          | -0.67         |
| am_p1_given2_margin      | 0.67          | -0.67         |
| am_p1_given2_simple      | 0.71          | 0.29          |
| am_p2_given1             | 0.01          | -0.01         |
| am_p2_given1_margin      | 0.01          | -0.0          |
| am_p2_given1_simple      | 0.01          | 0.0           |
| am_t_score               | 194.01        | -301.04       |
| conservative_log_ratio   | 5.87          | -5.87         |
| dice                     | 0.03          | 0.0           |
| f                        | 42021         | 17453         |
| f1                       | 3151804       | 80132539      |
| f2                       | 59474         | 59474         |
| index                    | 1             | 1870          |
| ipm                      | 13332.36      | 217.8         |
| ipm_expected             | 714.11        | 714.11        |
| ipm_reference            | 217.8         | 13332.36      |
| l1                       | NEGATED       | COMPLEMENT    |
| l2                       | exactly       | exactly       |
| liddell                  | 0.67          | -0.67         |
| local_mutual_information | 53414.72      | -9000.56      |
| log_likelihood           | 205058.83     | -205058.83    |
| log_ratio                | 5.94          | -5.94         |
| min_sensitivity          | 0.01          | 0.0           |
| mutual_information       | 1.27          | -0.52         |
| simple_ll                | 166443.32     | -38091.45     |
| t_score                  | 194.01        | -301.04       |
| z_score                  | 838.3         | -166.25       |

---

### Adj "sure" examples sorted by `am_p1_given2` column

|                          | NEG-sure   | COM-uninsured   | COM-unsure   | NEG-uninsured   | NEG-unsure   | COM-sure   |
|:-------------------------|:-----------|:----------------|:-------------|:----------------|:-------------|:-----------|
| N                        | 83284343   | 83284343        | 83284343     | 83284343        | 83284343     | 83284343   |
| am_Dice                  | 0.06       | 0.0             | 0.0          | 0.0             | 0.0          | 0.02       |
| am_expect_diff           | 96870.25   | 109.18          | 449.86       | -109.18         | -449.86      | -96870.25  |
| am_log_likelihood        | 180698.78  | 149.43          | 650.93       | -149.43         | -650.93      | -180698.78 |
| am_log_likelihood_tt     | 180698.78  | 149.43          | 650.93       | 149.43          | 650.93       | 180698.78  |
| am_odds_ratio_disc       | 0.67       | 0.83            | 0.92         | -0.83           | -0.92        | -0.67      |
| am_p1_given2             | 0.12       | 0.03            | 0.03         | -0.03           | -0.03        | -0.12      |
| am_p1_given2_margin      | 0.11       | 0.03            | 0.03         | -0.03           | -0.03        | -0.11      |
| am_p1_given2_simple      | 0.15       | 0.99            | 1.0          | 0.01            | 0.0          | 0.85       |
| am_p2_given1             | 0.03       | 0.0             | 0.0          | -0.0            | -0.0         | -0.03      |
| am_p2_given1_margin      | 0.03       | 0.0             | 0.0          | -0.0            | -0.0         | -0.0       |
| am_p2_given1_simple      | 0.04       | 0.0             | 0.0          | 0.0             | 0.0          | 0.01       |
| am_t_score               | 269.9      | 1.88            | 3.87         | -25.05          | -56.68       | -114.54    |
| conservative_log_ratio   | 2.17       | 1.31            | 2.2          | -1.31           | -2.2         | -2.17      |
| dice                     | 0.06       | 0.0             | 0.0          | 0.0             | 0.0          | 0.02       |
| f                        | 128813     | 3368            | 13489        | 19              | 63           | 715253     |
| f1                       | 3151804    | 80132539        | 80132539     | 3151804         | 3151804      | 80132539   |
| f2                       | 844066     | 3387            | 13552        | 3387            | 13552        | 844066     |
| index                    | 24         | 480             | 415          | 7254            | 7304         | 7563       |
| ipm                      | 40869.61   | 42.03           | 168.33       | 6.03            | 19.99        | 8925.87    |
| ipm_expected             | 10134.75   | 40.67           | 162.72       | 40.67           | 162.72       | 10134.75   |
| ipm_reference            | 8925.87    | 6.03            | 19.99        | 42.03           | 168.33       | 40869.61   |
| l1                       | NEGATED    | COMPLEMENT      | COMPLEMENT   | NEGATED         | NEGATED      | COMPLEMENT |
| l2                       | sure       | uninsured       | unsure       | uninsured       | unsure       | sure       |
| liddell                  | 0.12       | 0.03            | 0.03         | -0.03           | -0.03        | -0.12      |
| local_mutual_information | 78007.54   | 48.2            | 198.7        | -15.75          | -57.37       | -39454.97  |
| log_likelihood           | 180698.78  | 149.43          | 650.93       | -149.43         | -650.93      | -180698.78 |
| log_ratio                | 2.19       | 2.8             | 3.07         | -2.8            | -3.07        | -2.19      |
| min_sensitivity          | 0.04       | 0.0             | 0.0          | 0.0             | 0.0          | 0.01       |
| mutual_information       | 0.61       | 0.01            | 0.01         | -0.83           | -0.91        | -0.06      |
| simple_ll                | 165497.47  | 3.62            | 15.35        | -145.81         | -635.52      | -12043.65  |
| t_score                  | 269.9      | 1.88            | 3.87         | -25.05          | -56.68       | -114.54    |
| z_score                  | 542.01     | 1.91            | 3.94         | -9.64           | -19.86       | -107.49    |

---


In [10]:
for unit, df in dfs_plus.items():
    print_example(df, unit,example_key='necessarily', sort_by='conservative_log_ratio')


### Bigram "necessarily" examples sorted by `conservative_log_ratio` column

| key                               |      E11 |        N |   am_Dice |   am_expect_diff |   am_log_likelihood |   am_log_likelihood_tt |   am_odds_ratio_disc |   am_p1_given2 |   am_p1_given2_margin |   am_p1_given2_simple |   am_p2_given1 |   am_p2_given1_margin |   am_p2_given1_simple |   am_t_score |   conservative_log_ratio |   dice |    f |       f1 |   f2 |   index |      ipm |   ipm_expected |   ipm_reference | l1         | l2                            |   liddell |   local_mutual_information |   log_likelihood |   log_ratio |   min_sensitivity |   mutual_information |   simple_ll |   t_score |   z_score |
|:----------------------------------|---------:|---------:|----------:|-----------------:|--------------------:|-----------------------:|---------------------:|---------------:|----------------------:|----------------------:|---------------:|----------------------:|----------------------:|---------

In [11]:
adv = dfs_plus['adv']
big = dfs_plus['bigram']
adj = dfs_plus['adj']

In [12]:
print_example(adv, unit,example_key='necessarily', sort_by='conservative_log_ratio')


### Adj "necessarily" examples sorted by `conservative_log_ratio` column

|                          | NEG-necessarily   | COM-unnecessarily   | NEG-unnecessarily   | COM-necessarily   |
|:-------------------------|:------------------|:--------------------|:--------------------|:------------------|
| E11                      | 2046.64           | 10184.42            | 400.58              | 52034.36          |
| N                        | 83284343          | 83284343            | 83284343            | 83284343          |
| am_Dice                  | 0.03              | 0.0                 | 0.0                 | 0.0               |
| am_expect_diff           | 38995.36          | 108.58              | -108.58             | -38995.36         |
| am_log_likelihood        | 210533.42         | 33.68               | -33.68              | -210533.42        |
| am_log_likelihood_tt     | 210533.42         | 33.68               | 33.68               | 210533.42         |
| am_odds_ratio_disc 

### Conservative Log Ratio (Evert 2022)

source code for `am.conservative_log_ratio`

```python
def conservative_log_ratio(df, disc=.5, alpha=.001, boundary='normal',
                           correct='Bonferroni', vocab=None,
                           one_sided=False, **kwargs):
    """
    Calculate conservative log-ratio, i.e. the binary logarithm of the
    lower bound of the confidence interval of relative risk at the
    (Bonferroni-corrected) confidence level.

    :param DataFrame df: pd.DataFrame with columns O11, O12, O21, O22
    :param float disc: discounting (or smoothing) parameter for O11 == 0 and O21 == 0
    :param float alpha: significance level
    :param str boundary: exact CI boundary of [poisson] distribution or [normal] approximation?
    :param str correct: correction type for several tests (None | "Bonferroni" | "Sidak")
    :param int vocab: size of vocabulary (number of comparisons for correcting alpha)
    :param bool one_sided: calculate one- or two-sided confidence interval

    :return: conservative log-ratio
    :rtype: pd.Series

    """

    # correction of alpha for two-sided tests
    if not one_sided:
        alpha /= 2

    # Bonferroni or Sidak correction
    if correct is not None:
        if isinstance(correct, str):
            vocab = (df['O11'] >= 1).sum() if vocab is None else vocab
            if correct == 'Bonferroni':
                alpha /= vocab
            elif correct == "Sidak":
                alpha = 1 - (1 - alpha) ** (1 / vocab)
                # more stable alternative: alpha = 1 - exp(log(1 - alpha) / vocab)
                # doesn't make any difference in practice though, e.g. alpha = .00001, vocab = 10**10
            else:
                raise ValueError('parameter "correct" should either be "Bonferroni" or "Sidak".')
        else:
            raise ValueError('parameter "correct" should either be None or a string.')

    # CONFIDENCE INTERVAL

    # Poisson approximation (Evert 2022)
    if boundary == 'poisson':

        # only calculate where_lower
        lower = beta.ppf(alpha, df['O11'], df['O21'] + 1)
        lower_boundary = np.log2((df['R2'] / df['R1']) * lower / (1 - lower)).clip(lower=0)

        # only calculate where_upper
        upper = beta.ppf(1 - alpha, df['O11'] + 1, df['O21'])
        upper_boundary = np.log2((df['R2'] / df['R1']) * upper / (1 - upper)).clip(upper=0)

        # combine, set to 0 where (df['O11'] == 0) & (df['O12'] == 0)
        clrr = lower_boundary.where(
            (df['O11'] / df['R1']) >= (df['O21'] / df['R2']),
            upper_boundary
        )
        clrr = clrr.where(~((df['O11'] == 0) & (df['O12'] == 0)), 0).fillna(0)

    # Normal approximation (Hardie 2014)
    elif boundary == 'normal':
        # - questionable discounting according to Hardie (2014)
        O11_disc = df['O11'].where(df['O11'] != 0, disc)
        O21_disc = df['O21'].where(df['O21'] != 0, disc)
        # - compute natural logarithm of relative risk so we can use estimate for standard error of log(RR)
        lrr = np.log((O11_disc / O21_disc) / (df['R1'] / df['R2']))
        # - asymptotic standard deviation of log(RR) according to Wikipedia
        lrr_sd = np.sqrt(1/O11_disc + 1/O21_disc - 1/df['R1'] - 1/df['R2'])
        # - calculate and apply appropriate boundary
        z_factor = norm.ppf(1 - alpha)
        ci_min = (lrr - lrr_sd * z_factor).clip(lower=0)
        ci_max = (lrr + lrr_sd * z_factor).clip(upper=0)
        clrr = ci_min.where(lrr >= 0, ci_max)
        clrr /= np.log(2)           # adjust to binary logarithm

    return clrr
```

In [13]:
am.conservative_log_ratio(dfs_plus['bigram'], alpha=0.05, boundary='poisson').nlargest(20)

key
NEG-yet_clear                 10.337655
NEG-even_sure                  9.670686
NEG-yet_ready                  9.275453
NEG-that_uncommon              8.879671
NEG-exactly_cheap              8.847256
NEG-exactly_surprising         8.835102
NEG-entirely_sure              8.778469
NEG-exactly_new                8.745181
NEG-exactly_sure               8.654519
NEG-exactly_easy               8.595948
NEG-too_surprising             8.557151
NEG-yet_complete               8.517957
NEG-necessarily_indicative     8.499544
NEG-yet_sure                   8.444451
NEG-exactly_clear              8.401637
NEG-yet_certain                8.329498
NEG-immediately_clear          8.309717
NEG-yet_eligible               8.307389
NEG-only_delicious             8.303972
NEG-that_surprising            8.300633
dtype: float64

In [14]:
am.conservative_log_ratio(adv, alpha=0.05, boundary='poisson').sort_values(ascending=False).abs().round(0).value_counts()

0.0    663
1.0    570
2.0    381
3.0    143
4.0     86
5.0     21
6.0      8
Name: count, dtype: int64

In [15]:
am.conservative_log_ratio(adv, alpha=0.05, boundary='poisson').sort_values(ascending=False).round(0).abs().nlargest(10)

key
NEG-necessarily     6.0
COM-increasingly    6.0
NEG-exactly         6.0
NEG-that            6.0
COM-that            6.0
COM-exactly         6.0
NEG-increasingly    6.0
COM-necessarily     6.0
COM-relatively      5.0
COM-albeit          5.0
dtype: float64

In [16]:
adv.loc[adv.am_p1_given2 > 0.6, ['l1','l2', 'f', 'am_expect_diff', 'am_p1_given2']]
    

Unnamed: 0_level_0,l1,l2,f,am_expect_diff,am_p1_given2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NEG-necessarily,NEGATED,necessarily,41042,38995.36427,0.721523
NEG-exactly,NEGATED,exactly,42021,39770.27214,0.669178
NEG-that,NEGATED,that,165327,155884.15327,0.626611


In [17]:
dfs_plus['bigram'] = dfs_plus['bigram'].assign(adv=big.l2.str.split("_").str.get(0).astype('string').astype('category'), 
                                               adj=big.l2.str.split("_").str.get(1).astype('string').astype('category'))

    

In [18]:
floor = 0.9
count_cutoff = 2
skewed_bigrams = dfs_plus['bigram'].loc[dfs_plus['bigram'].am_p1_given2 > floor, ['l1','adv','adj', 'f', 'am_expect_diff', 'am_p1_given2', 't_score', 'log_likelihood', 'log_ratio', 'conservative_log_ratio']].round(2)
for ad in ['adv', 'adj']:
    neg_skewed_counts = skewed_bigrams[ad].astype('string').value_counts()
    # neg_skewed_counts['TOTAL'] = neg_skewed_counts.sum()
    
    print_md_table(neg_skewed_counts.loc[neg_skewed_counts > count_cutoff].to_frame('bigram count'), 
                   title=f'### {ad.capitalize()} appearing in more than {count_cutoff} negatively "skewed" bigram (p1.given2 > {floor})\n', n_dec=2)
    
    print(f"\n+ mean bigram count for unique {ad} = {neg_skewed_counts.filter(regex=r'[a-z]').mean().round(1)}")



### Adv appearing in more than 2 negatively "skewed" bigram (p1.given2 > 0.9)

| adv         |   bigram count |
|:------------|---------------:|
| exactly     |            259 |
| only        |             83 |
| necessarily |             49 |
| that        |             42 |
| yet         |             34 |
| always      |              4 |
| entirely    |              3 |
| remotely    |              3 |
| even        |              3 |
| quite       |              3 |

+ mean bigram count for unique adv = 20.8

### Adj appearing in more than 2 negatively "skewed" bigram (p1.given2 > 0.9)

| adj          |   bigram count |
|:-------------|---------------:|
| sure         |              7 |
| surprising   |              5 |
| unheard      |              4 |
| fond         |              4 |
| welcoming    |              3 |
| impressed    |              3 |
| uncommon     |              3 |
| unusual      |              3 |
| privy        |              3 |
| eco-friendly |           

In [19]:
print_example(big, count_type='bigram', example_key='that_complicated')


### Bigram "that_complicated" examples sorted by `am_p1_given2` column

|                          | NEG-that_complicated   | COM-that_complicated   |
|:-------------------------|:-----------------------|:-----------------------|
| E11                      | 48.06                  | 1221.94                |
| N                        | 83284343               | 83284343               |
| am_Dice                  | 0.0                    | 0.0                    |
| am_expect_diff           | 1160.94                | -1160.94               |
| am_log_likelihood        | 7432.97                | -7432.97               |
| am_log_likelihood_tt     | 7432.97                | 7432.97                |
| am_odds_ratio_disc       | 2.7                    | -2.7                   |
| am_p1_given2             | 0.91                   | -0.91                  |
| am_p1_given2_margin      | 0.91                   | -0.91                  |
| am_p1_given2_simple      | 0.95                   | 0.05

In [20]:
skewed_bigrams.f.sum()/ big.head(1).N

key
NEG-exactly_shy    0.001304
Name: N, dtype: float64

In [21]:
sorter = 'conservative_log_ratio'
print_md_table(skewed_bigrams.copy().sort_values(sorter, ascending=False).reset_index(drop=True),
               title=f'### Bigrams with adjusted conditional probability of env (`l1`) > 0.9, sorted by `{sorter}`', n_dec=2, comma=False)
    


### Bigrams with adjusted conditional probability of env (`l1`) > 0.9, sorted by `conservative_log_ratio`
|     | l1      | adv          | adj              |     f |   am_expect_diff |   am_p1_given2 |   t_score |   log_likelihood |   log_ratio |   conservative_log_ratio |
|----:|:--------|:-------------|:-----------------|------:|-----------------:|---------------:|----------:|-----------------:|------------:|-------------------------:|
|   0 | NEGATED | yet          | clear            | 10557 |         10152.34 |           0.95 |     98.81 |         67720.33 |       10.95 |                    10.26 |
|   1 | NEGATED | even         | sure             | 14526 |         13963.22 |           0.94 |    115.85 |         91936.73 |       10.06 |                     9.62 |
|   2 | NEGATED | yet          | ready            |  7615 |          7318.38 |           0.93 |     83.86 |         47875.09 |        9.76 |                     9.22 |
|   3 | NEGATED | entirely     | sure             | 1

''


### Bigrams with adjusted conditional probability of env (`l1`) > 0.9, sorted by `conservative_log_ratio`
|     | l1      | adv          | adj              |     f |   am_expect_diff |   am_p1_given2 |   t_score |   log_likelihood |   log_ratio |   conservative_log_ratio |
|----:|:--------|:-------------|:-----------------|------:|-----------------:|---------------:|----------:|-----------------:|------------:|-------------------------:|
|   0 | NEGATED | yet          | clear            | 10557 |         10152.34 |           0.95 |     98.81 |         67720.33 |       10.95 |                    10.26 |
|   1 | NEGATED | even         | sure             | 14526 |         13963.22 |           0.94 |    115.85 |         91936.73 |       10.06 |                     9.62 |
|   2 | NEGATED | yet          | ready            |  7615 |          7318.38 |           0.93 |     83.86 |         47875.09 |        9.76 |                     9.22 |
|   3 | NEGATED | entirely     | sure             | 13049 |         12531.98 |           0.92 |    109.71 |         80547.88 |        9.08 |                     8.74 |
|   4 | NEGATED | that         | uncommon         |   806 |           775.01 |           0.95 |     27.30 |          5145.84 |       10.62 |                     8.70 |
|   5 | NEGATED | exactly      | cheap            |   694 |           667.36 |           0.95 |     25.33 |          4440.69 |       10.78 |                     8.66 |
|   6 | NEGATED | exactly      | new              |  1380 |          1326.34 |           0.94 |     35.70 |          8690.49 |        9.85 |                     8.62 |
|   7 | NEGATED | exactly      | sure             |  8864 |          8512.01 |           0.92 |     90.41 |         54578.39 |        9.01 |                     8.61 |
|   8 | NEGATED | exactly      | surprising       |   441 |           424.20 |           0.96 |     20.20 |          2852.25 |       11.87 |                     8.58 |
|   9 | NEGATED | too          | surprising       |  4030 |          3870.37 |           0.92 |     60.97 |         24873.08 |        9.09 |                     8.49 |
|  10 | NEGATED | exactly      | easy             |  1070 |          1028.37 |           0.93 |     31.44 |          6734.35 |        9.82 |                     8.46 |
|  11 | NEGATED | yet          | complete         |  2222 |          2134.43 |           0.92 |     45.28 |         13785.84 |        9.26 |                     8.43 |
|  12 | NEGATED | necessarily  | indicative       |  1406 |          1350.90 |           0.93 |     36.03 |          8776.35 |        9.48 |                     8.39 |
|  13 | NEGATED | yet          | sure             |  1990 |          1911.47 |           0.92 |     42.85 |         12329.79 |        9.22 |                     8.35 |
|  14 | NEGATED | exactly      | clear            |  1760 |          1690.56 |           0.92 |     40.30 |         10905.73 |        9.22 |                     8.31 |
|  15 | NEGATED | yet          | certain          |   874 |           839.83 |           0.93 |     28.41 |          5469.43 |        9.58 |                     8.19 |
|  16 | NEGATED | that         | surprising       |  1142 |          1097.08 |           0.92 |     32.46 |          7099.54 |        9.33 |                     8.18 |
|  17 | NEGATED | only         | delicious        |   859 |           825.39 |           0.93 |     28.16 |          5372.19 |        9.56 |                     8.17 |
|  18 | NEGATED | that         | common           |  1217 |          1169.01 |           0.92 |     33.51 |          7546.31 |        9.24 |                     8.15 |
|  19 | NEGATED | yet          | eligible         |   459 |           441.29 |           0.94 |     20.60 |          2917.60 |       10.34 |                     8.11 |
|  20 | NEGATED | that         | complicated      |  1209 |          1160.94 |           0.91 |     33.39 |          7432.97 |        8.98 |                     7.97 |
|  21 | NEGATED | that         | unusual          |   983 |           944.10 |           0.92 |     30.11 |          6071.43 |        9.12 |                     7.96 |
|  22 | NEGATED | only         | stylish          |   340 |           326.91 |           0.94 |     17.73 |          2166.46 |       10.49 |                     7.89 |
|  23 | NEGATED | entirely     | surprising       |  1528 |          1466.43 |           0.90 |     37.51 |          9268.44 |        8.62 |                     7.80 |
|  24 | NEGATED | that         | fond             |   334 |           321.10 |           0.94 |     17.57 |          2119.54 |       10.24 |                     7.78 |
|  25 | NEGATED | all          | uncommon         |   228 |           219.30 |           0.95 |     14.52 |          1470.28 |       11.50 |                     7.73 |
|  26 | NEGATED | yet          | official         |   353 |           339.30 |           0.94 |     18.06 |          2228.10 |        9.96 |                     7.71 |
|  27 | NEGATED | necessarily  | sure             |   222 |           213.52 |           0.95 |     14.33 |          1431.10 |       11.46 |                     7.69 |
|  28 | NEGATED | immediately  | possible         |  1027 |           985.71 |           0.90 |     30.76 |          6243.47 |        8.67 |                     7.68 |
|  29 | NEGATED | only         | unnecessary      |   493 |           473.59 |           0.92 |     21.33 |          3061.07 |        9.29 |                     7.65 |
|  30 | NEGATED | always       | feasible         |   596 |           572.35 |           0.92 |     23.44 |          3670.57 |        9.03 |                     7.62 |
|  31 | NEGATED | that         | impressed        |   684 |           656.71 |           0.91 |     25.11 |          4190.39 |        8.88 |                     7.61 |
|  32 | NEGATED | necessarily  | surprising       |   344 |           330.57 |           0.93 |     17.82 |          2155.50 |        9.63 |                     7.55 |
|  33 | NEGATED | only         | inaccurate       |   201 |           193.32 |           0.95 |     13.64 |          1293.97 |       11.32 |                     7.54 |
|  34 | NEGATED | that         | exciting         |   807 |           774.49 |           0.90 |     27.26 |          4896.45 |        8.62 |                     7.53 |
|  35 | NEGATED | only         | ineffective      |   361 |           346.85 |           0.93 |     18.26 |          2252.19 |        9.46 |                     7.50 |
|  36 | NEGATED | yet          | final            |   659 |           632.55 |           0.90 |     24.64 |          4012.20 |        8.71 |                     7.48 |
|  37 | NEGATED | yet          | public           |   496 |           476.25 |           0.91 |     21.38 |          3043.51 |        8.92 |                     7.44 |
|  38 | NEGATED | exactly      | subtle           |   264 |           253.74 |           0.94 |     15.62 |          1664.38 |        9.90 |                     7.42 |
|  39 | NEGATED | necessarily  | representative   |   497 |           477.17 |           0.91 |     21.40 |          3044.07 |        8.87 |                     7.41 |
|  40 | NEGATED | exactly      | fun              |   226 |           217.26 |           0.94 |     14.45 |          1432.16 |       10.17 |                     7.36 |
|  41 | NEGATED | altogether   | sure             |   194 |           186.54 |           0.95 |     13.39 |          1239.60 |       10.68 |                     7.35 |
|  42 | NEGATED | only         | unfair           |   481 |           461.70 |           0.91 |     21.05 |          2929.56 |        8.72 |                     7.30 |
|  43 | NEGATED | exactly      | happy            |   441 |           423.29 |           0.90 |     20.16 |          2683.61 |        8.70 |                     7.23 |
|  44 | NEGATED | yet          | over             |   178 |           171.15 |           0.95 |     12.83 |          1135.34 |       10.56 |                     7.21 |
|  45 | NEGATED | that         | dissimilar       |   307 |           294.85 |           0.92 |     16.83 |          1896.43 |        9.12 |                     7.21 |
|  46 | NEGATED | exactly      | shy              |   124 |           119.31 |           0.96 |     10.71 |           812.03 |       21.59 |                     7.18 |
|  47 | NEGATED | exactly      | ideal            |   418 |           401.16 |           0.90 |     19.62 |          2535.79 |        8.62 |                     7.15 |
|  48 | NEGATED | exactly      | conducive        |   208 |           199.90 |           0.93 |     13.86 |          1307.86 |        9.78 |                     7.14 |
|  49 | NEGATED | exactly      | great            |   309 |           296.59 |           0.90 |     16.87 |          1879.88 |        8.69 |                     6.98 |
|  50 | NEGATED | that         | noticeable       |   265 |           254.44 |           0.91 |     15.63 |          1625.41 |        8.91 |                     6.98 |
|  51 | NEGATED | exactly      | forthcoming      |   107 |           102.95 |           0.96 |      9.95 |           700.70 |       21.38 |                     6.95 |
|  52 | NEGATED | exactly      | hard             |   203 |           195.01 |           0.92 |     13.69 |          1261.94 |        9.33 |                     6.94 |
|  53 | NEGATED | exactly      | practical        |   106 |           101.99 |           0.96 |      9.91 |           694.15 |       21.36 |                     6.93 |
|  54 | NEGATED | yet          | convinced        |   178 |           171.04 |           0.93 |     12.82 |          1113.24 |        9.56 |                     6.90 |
|  55 | NEGATED | only         | untrue           |   167 |           160.49 |           0.93 |     12.42 |          1048.77 |        9.73 |                     6.89 |
|  56 | NEGATED | yet          | operational      |   212 |           203.60 |           0.92 |     13.98 |          1307.54 |        9.07 |                     6.87 |
|  57 | NEGATED | necessarily  | synonymous       |   165 |           158.57 |           0.93 |     12.34 |          1035.79 |        9.71 |                     6.87 |
|  58 | NEGATED | yet          | right            |   202 |           194.01 |           0.92 |     13.65 |          1249.12 |        9.16 |                     6.86 |
|  59 | NEGATED | exactly      | stellar          |   171 |           164.30 |           0.93 |     12.56 |          1067.87 |        9.50 |                     6.84 |
|  60 | NEGATED | exactly      | impressive       |   100 |            96.22 |           0.96 |      9.62 |           654.86 |       21.28 |                     6.84 |
|  61 | NEGATED | exactly      | rare             |   135 |           129.78 |           0.94 |     11.17 |           855.39 |       10.16 |                     6.78 |
|  62 | NEGATED | yet          | widespread       |   145 |           139.36 |           0.94 |     11.57 |           913.02 |        9.85 |                     6.78 |
|  63 | NEGATED | only         | unsightly        |   154 |           147.98 |           0.93 |     11.92 |           964.44 |        9.61 |                     6.77 |
|  64 | NEGATED | exactly      | original         |   163 |           156.60 |           0.93 |     12.27 |          1016.05 |        9.43 |                     6.76 |
|  65 | NEGATED | exactly      | healthy          |   193 |           185.32 |           0.91 |     13.34 |          1184.95 |        8.94 |                     6.72 |
|  66 | NEGATED | exactly      | famous           |   130 |           124.97 |           0.94 |     10.96 |           822.87 |       10.11 |                     6.72 |
|  67 | NEGATED | only         | disrespectful    |   118 |           113.46 |           0.95 |     10.44 |           752.55 |       10.55 |                     6.71 |
|  68 | NEGATED | exactly      | short            |   128 |           123.04 |           0.94 |     10.88 |           809.86 |       10.08 |                     6.70 |
|  69 | NEGATED | yet          | legal            |   166 |           159.45 |           0.92 |     12.38 |          1029.00 |        9.24 |                     6.70 |
|  70 | NEGATED | exactly      | eager            |   116 |           111.53 |           0.95 |     10.36 |           739.52 |       10.53 |                     6.68 |
|  71 | NEGATED | exactly      | popular          |   171 |           164.23 |           0.92 |     12.56 |          1055.07 |        9.09 |                     6.67 |
|  72 | NEGATED | exactly      | difficult        |   126 |           121.12 |           0.94 |     10.79 |           796.86 |       10.06 |                     6.67 |
|  73 | NEGATED | exactly      | friendly         |   168 |           161.34 |           0.92 |     12.45 |          1035.70 |        9.06 |                     6.64 |
|  74 | NEGATED | exactly      | revolutionary    |   122 |           117.27 |           0.94 |     10.62 |           770.86 |       10.01 |                     6.62 |
|  75 | NEGATED | exactly      | pleased          |   173 |           166.11 |           0.91 |     12.63 |          1061.94 |        8.93 |                     6.62 |
|  76 | NEGATED | exactly      | common           |   155 |           148.87 |           0.92 |     11.96 |           957.90 |        9.14 |                     6.60 |
|  77 | NEGATED | only         | inappropriate    |   145 |           139.29 |           0.92 |     11.57 |           899.55 |        9.26 |                     6.58 |
|  78 | NEGATED | yet          | prepared         |   167 |           160.34 |           0.91 |     12.41 |          1023.26 |        8.88 |                     6.56 |
|  79 | NEGATED | necessarily  | reflective       |   187 |           179.47 |           0.90 |     13.12 |          1134.86 |        8.63 |                     6.55 |
|  80 | NEGATED | exactly      | straightforward  |    83 |            79.86 |           0.96 |      8.77 |           543.53 |       21.01 |                     6.54 |
|  81 | NEGATED | only         | incorrect        |   149 |           143.10 |           0.92 |     11.72 |           919.15 |        9.08 |                     6.53 |
|  82 | NEGATED | only         | unconstitutional |   173 |           166.04 |           0.90 |     12.62 |          1050.46 |        8.64 |                     6.49 |
|  83 | NEGATED | only         | absurd           |   144 |           138.29 |           0.92 |     11.52 |           886.87 |        9.03 |                     6.48 |
|  84 | NEGATED | exactly      | pleasant         |   144 |           138.29 |           0.92 |     11.52 |           886.87 |        9.03 |                     6.48 |
|  85 | NEGATED | always       | clear-cut        |   143 |           137.32 |           0.92 |     11.48 |           880.42 |        9.02 |                     6.47 |
|  86 | NEGATED | exactly      | reassuring       |    78 |            75.05 |           0.96 |      8.50 |           510.79 |       20.92 |                     6.44 |
|  87 | NEGATED | only         | unrealistic      |   125 |           120.08 |           0.92 |     10.74 |           776.58 |        9.31 |                     6.43 |
|  88 | NEGATED | exactly      | shocking         |   151 |           144.94 |           0.91 |     11.80 |           920.25 |        8.74 |                     6.40 |
|  89 | NEGATED | exactly      | intuitive        |    76 |            73.12 |           0.96 |      8.39 |           497.69 |       20.88 |                     6.40 |
|  90 | NEGATED | exactly      | helpful          |   129 |           123.89 |           0.92 |     10.91 |           796.14 |        9.09 |                     6.39 |
|  91 | NEGATED | exactly      | free             |   156 |           149.72 |           0.90 |     11.99 |           946.78 |        8.63 |                     6.39 |
|  92 | NEGATED | that         | shocking         |   111 |           106.65 |           0.93 |     10.12 |           692.48 |        9.46 |                     6.35 |
|  93 | NEGATED | exactly      | high             |   133 |           127.70 |           0.91 |     11.07 |           815.92 |        8.92 |                     6.35 |
|  94 | NEGATED | necessarily  | essential        |    93 |            89.40 |           0.94 |      9.27 |           589.77 |       10.21 |                     6.32 |
|  95 | NEGATED | exactly      | comforting       |   100 |            96.10 |           0.93 |      9.61 |           627.96 |        9.73 |                     6.30 |
|  96 | NEGATED | exactly      | glamorous        |    82 |            78.86 |           0.95 |      8.71 |           526.24 |       11.02 |                     6.29 |
|  97 | NEGATED | exactly      | unexpected       |    82 |            78.86 |           0.95 |      8.71 |           526.24 |       11.02 |                     6.29 |
|  98 | NEGATED | only         | inefficient      |   105 |           100.88 |           0.93 |      9.84 |           653.62 |        9.38 |                     6.26 |
|  99 | NEGATED | exactly      | exciting         |   118 |           113.31 |           0.91 |     10.43 |           725.15 |        8.97 |                     6.25 |
| 100 | NEGATED | only         | stunning         |   128 |           122.85 |           0.90 |     10.86 |           777.99 |        8.67 |                     6.21 |
| 101 | NEGATED | necessarily  | visible          |   121 |           116.16 |           0.91 |     10.56 |           738.63 |        8.78 |                     6.20 |
| 102 | NEGATED | yet          | online           |   100 |            96.06 |           0.92 |      9.61 |           621.26 |        9.31 |                     6.18 |
| 103 | NEGATED | exactly      | fond             |    85 |            81.71 |           0.94 |      8.86 |           537.74 |       10.08 |                     6.18 |
| 104 | NEGATED | exactly      | novel            |    66 |            63.50 |           0.96 |      7.82 |           432.21 |       20.68 |                     6.17 |
| 105 | NEGATED | yet          | live             |   119 |           114.23 |           0.91 |     10.47 |           725.76 |        8.76 |                     6.17 |
| 106 | NEGATED | exactly      | safe             |   117 |           112.31 |           0.91 |     10.38 |           712.89 |        8.73 |                     6.14 |
| 107 | NEGATED | exactly      | unheard          |    65 |            62.54 |           0.96 |      7.76 |           425.66 |       20.66 |                     6.14 |
| 108 | NEGATED | yet          | mainstream       |    75 |            72.12 |           0.95 |      8.33 |           480.57 |       10.90 |                     6.14 |
| 109 | NEGATED | exactly      | keen             |    83 |            79.78 |           0.94 |      8.76 |           524.74 |       10.04 |                     6.14 |
| 110 | NEGATED | quite        | over             |    83 |            79.78 |           0.94 |      8.76 |           524.74 |       10.04 |                     6.14 |
| 111 | NEGATED | necessarily  | unusual          |   109 |           104.65 |           0.91 |     10.02 |           667.14 |        8.85 |                     6.12 |
| 112 | NEGATED | only         | refreshing       |   115 |           110.38 |           0.90 |     10.29 |           700.03 |        8.71 |                     6.12 |
| 113 | NEGATED | exactly      | unknown          |    74 |            71.16 |           0.95 |      8.27 |           474.05 |       10.88 |                     6.12 |
| 114 | NEGATED | exactly      | uncommon         |    89 |            85.52 |           0.93 |      9.06 |           556.62 |        9.56 |                     6.11 |
| 115 | NEGATED | exactly      | romantic         |    64 |            61.58 |           0.96 |      7.70 |           419.11 |       20.63 |                     6.11 |
| 116 | NEGATED | only         | inhumane         |    79 |            75.93 |           0.94 |      8.54 |           498.74 |        9.97 |                     6.05 |
| 117 | NEGATED | only         | unhelpful        |    84 |            80.71 |           0.93 |      8.81 |           524.21 |        9.48 |                     6.01 |
| 118 | NEGATED | exactly      | welcome          |    76 |            73.05 |           0.94 |      8.38 |           479.24 |        9.92 |                     5.99 |
| 119 | NEGATED | exactly      | unique           |    95 |            91.22 |           0.91 |      9.36 |           582.80 |        8.92 |                     5.99 |
| 120 | NEGATED | only         | impractical      |   101 |            96.95 |           0.91 |      9.65 |           615.64 |        8.74 |                     5.99 |
| 121 | NEGATED | exactly      | light            |    76 |            73.05 |           0.94 |      8.38 |           479.24 |        9.92 |                     5.99 |
| 122 | NEGATED | that         | thrilled         |    59 |            56.77 |           0.96 |      7.39 |           386.37 |       20.52 |                     5.98 |
| 123 | NEGATED | exactly      | welcoming        |    75 |            72.09 |           0.94 |      8.32 |           472.75 |        9.90 |                     5.97 |
| 124 | NEGATED | exactly      | mainstream       |    75 |            72.09 |           0.94 |      8.32 |           472.75 |        9.90 |                     5.97 |
| 125 | NEGATED | exactly      | positive         |    99 |            95.03 |           0.91 |      9.55 |           602.78 |        8.71 |                     5.96 |
| 126 | NEGATED | necessarily  | straightforward  |    66 |            63.46 |           0.95 |      7.81 |           421.89 |       10.71 |                     5.93 |
| 127 | NEGATED | that         | challenging      |    97 |            93.10 |           0.90 |      9.45 |           589.92 |        8.68 |                     5.92 |
| 128 | NEGATED | exactly      | realistic        |    84 |            80.67 |           0.92 |      8.80 |           517.85 |        9.06 |                     5.89 |
| 129 | NEGATED | exactly      | convenient       |    71 |            68.24 |           0.93 |      8.10 |           446.77 |        9.82 |                     5.87 |
| 130 | NEGATED | exactly      | scientific       |    70 |            67.28 |           0.93 |      8.04 |           440.28 |        9.80 |                     5.85 |
| 131 | NEGATED | only         | discriminatory   |    70 |            67.28 |           0.93 |      8.04 |           440.28 |        9.80 |                     5.85 |
| 132 | NEGATED | only         | unsustainable    |    70 |            67.28 |           0.93 |      8.04 |           440.28 |        9.80 |                     5.85 |
| 133 | NEGATED | exactly      | impressed        |    60 |            57.69 |           0.95 |      7.45 |           382.79 |       10.57 |                     5.76 |
| 134 | NEGATED | necessarily  | comparable       |    60 |            57.69 |           0.95 |      7.45 |           382.79 |       10.57 |                     5.76 |
| 135 | NEGATED | exactly      | kind             |    52 |            50.03 |           0.96 |      6.94 |           340.53 |       20.33 |                     5.76 |
| 136 | NEGATED | exactly      | sexy             |    64 |            61.50 |           0.93 |      7.69 |           401.34 |        9.67 |                     5.70 |
| 137 | NEGATED | exactly      | quiet            |    64 |            61.50 |           0.93 |      7.69 |           401.34 |        9.67 |                     5.70 |
| 138 | NEGATED | only         | futile           |    75 |            72.01 |           0.91 |      8.32 |           459.79 |        8.90 |                     5.70 |
| 139 | NEGATED | exactly      | obvious          |    50 |            48.11 |           0.96 |      6.80 |           327.43 |       20.28 |                     5.69 |
| 140 | NEGATED | only         | eco-friendly     |    69 |            66.28 |           0.92 |      7.98 |           427.14 |        9.19 |                     5.68 |
| 141 | NEGATED | exactly      | flattering       |    78 |            74.86 |           0.90 |      8.48 |           473.39 |        8.63 |                     5.65 |
| 142 | NEGATED | only         | incapable        |    73 |            70.09 |           0.91 |      8.20 |           446.91 |        8.86 |                     5.65 |
| 143 | NEGATED | exactly      | smooth           |    76 |            72.93 |           0.90 |      8.37 |           460.54 |        8.59 |                     5.61 |
| 144 | NEGATED | exactly      | enthusiastic     |    71 |            68.16 |           0.91 |      8.09 |           434.03 |        8.82 |                     5.60 |
| 145 | NEGATED | that         | fussy            |    65 |            62.43 |           0.92 |      7.74 |           401.30 |        9.11 |                     5.58 |
| 146 | NEGATED | exactly      | worried          |    47 |            45.22 |           0.96 |      6.60 |           307.78 |       20.19 |                     5.58 |
| 147 | NEGATED | exactly      | huge             |    59 |            56.69 |           0.93 |      7.38 |           368.92 |        9.55 |                     5.56 |
| 148 | NEGATED | exactly      | simple           |    64 |            61.46 |           0.92 |      7.68 |           394.84 |        9.08 |                     5.55 |
| 149 | NEGATED | exactly      | inexpensive      |    46 |            44.26 |           0.96 |      6.53 |           301.23 |       20.16 |                     5.54 |
| 150 | NEGATED | only         | yummy            |    44 |            42.33 |           0.96 |      6.38 |           288.14 |       20.09 |                     5.46 |
| 151 | NEGATED | necessarily  | apparent         |    56 |            53.81 |           0.93 |      7.19 |           349.48 |        9.47 |                     5.46 |
| 152 | NEGATED | yet          | extinct          |    50 |            48.07 |           0.94 |      6.80 |           317.66 |       10.31 |                     5.45 |
| 153 | NEGATED | necessarily  | confident        |    60 |            57.62 |           0.91 |      7.44 |           369.02 |        8.99 |                     5.44 |
| 154 | NEGATED | only         | unwise           |    64 |            61.43 |           0.90 |      7.68 |           388.99 |        8.67 |                     5.42 |
| 155 | NEGATED | necessarily  | intuitive        |    59 |            56.65 |           0.91 |      7.38 |           362.57 |        8.97 |                     5.41 |
| 156 | NEGATED | exactly      | willing          |    49 |            47.11 |           0.94 |      6.73 |           311.15 |       10.28 |                     5.41 |
| 157 | NEGATED | only         | unprofessional   |    54 |            51.88 |           0.93 |      7.06 |           336.52 |        9.42 |                     5.40 |
| 158 | NEGATED | exactly      | unusual          |    54 |            51.88 |           0.93 |      7.06 |           336.52 |        9.42 |                     5.40 |
| 159 | NEGATED | only         | sturdy           |    58 |            55.69 |           0.91 |      7.31 |           356.13 |        8.94 |                     5.38 |
| 160 | NEGATED | necessarily  | welcome          |    62 |            59.50 |           0.90 |      7.56 |           376.14 |        8.62 |                     5.37 |
| 161 | NEGATED | exactly      | appetizing       |    48 |            46.15 |           0.94 |      6.66 |           304.65 |       10.25 |                     5.37 |
| 162 | NEGATED | exactly      | appealing        |    53 |            50.92 |           0.93 |      6.99 |           330.05 |        9.40 |                     5.37 |
| 163 | NEGATED | exactly      | crazy            |    48 |            46.15 |           0.94 |      6.66 |           304.65 |       10.25 |                     5.37 |
| 164 | NEGATED | always       | conclusive       |    48 |            46.15 |           0.94 |      6.66 |           304.65 |       10.25 |                     5.37 |
| 165 | NEGATED | that         | frequent         |    57 |            54.73 |           0.91 |      7.25 |           349.68 |        8.92 |                     5.35 |
| 166 | NEGATED | only         | diverse          |    61 |            58.54 |           0.90 |      7.50 |           369.72 |        8.60 |                     5.34 |
| 167 | NEGATED | exactly      | sympathetic      |    46 |            44.22 |           0.94 |      6.52 |           291.63 |       10.19 |                     5.30 |
| 168 | NEGATED | exactly      | commonplace      |    46 |            44.22 |           0.94 |      6.52 |           291.63 |       10.19 |                     5.30 |
| 169 | NEGATED | exactly      | innocent         |    50 |            48.03 |           0.92 |      6.79 |           310.63 |        9.31 |                     5.26 |
| 170 | NEGATED | only         | inspirational    |    50 |            48.03 |           0.92 |      6.79 |           310.63 |        9.31 |                     5.26 |
| 171 | NEGATED | terribly     | dissimilar       |    44 |            42.30 |           0.94 |      6.38 |           278.62 |       10.13 |                     5.21 |
| 172 | NEGATED | yet          | privy            |    38 |            36.56 |           0.96 |      5.93 |           248.85 |       19.88 |                     5.20 |
| 173 | NEGATED | exactly      | rosy             |    48 |            46.11 |           0.92 |      6.66 |           297.69 |        9.25 |                     5.19 |
| 174 | NEGATED | necessarily  | cause            |    52 |            49.92 |           0.91 |      6.92 |           317.47 |        8.78 |                     5.18 |
| 175 | NEGATED | exactly      | reliable         |    52 |            49.92 |           0.91 |      6.92 |           317.47 |        8.78 |                     5.18 |
| 176 | NEGATED | only         | exhausting       |    52 |            49.92 |           0.91 |      6.92 |           317.47 |        8.78 |                     5.18 |
| 177 | NEGATED | exactly      | unprecedented    |    37 |            35.60 |           0.96 |      5.85 |           242.30 |       19.84 |                     5.15 |
| 178 | NEGATED | exactly      | supportive       |    37 |            35.60 |           0.96 |      5.85 |           242.30 |       19.84 |                     5.15 |
| 179 | NEGATED | exactly      | enjoyable        |    42 |            40.37 |           0.94 |      6.23 |           265.62 |       10.06 |                     5.13 |
| 180 | NEGATED | exactly      | user-friendly    |    36 |            34.64 |           0.96 |      5.77 |           235.75 |       19.80 |                     5.10 |
| 181 | NEGATED | really       | privy            |    41 |            39.41 |           0.94 |      6.15 |           259.12 |       10.02 |                     5.08 |
| 182 | NEGATED | necessarily  | original         |    40 |            38.45 |           0.94 |      6.08 |           252.62 |        9.99 |                     5.04 |
| 183 | NEGATED | exactly      | transparent      |    35 |            33.68 |           0.96 |      5.69 |           229.20 |       19.76 |                     5.04 |
| 184 | NEGATED | exactly      | alone            |    35 |            33.68 |           0.96 |      5.69 |           229.20 |       19.76 |                     5.04 |
| 185 | NEGATED | exactly      | shocked          |    48 |            46.07 |           0.90 |      6.65 |           291.74 |        8.67 |                     5.03 |
| 186 | NEGATED | yet          | proficient       |    44 |            42.26 |           0.92 |      6.37 |           271.84 |        9.13 |                     5.03 |
| 187 | NEGATED | necessarily  | interchangeable  |    34 |            32.71 |           0.96 |      5.61 |           222.65 |       19.72 |                     4.99 |
| 188 | NEGATED | exactly      | honest           |    39 |            37.49 |           0.94 |      6.00 |           246.12 |        9.95 |                     4.99 |
| 189 | NEGATED | necessarily  | transferable     |    34 |            32.71 |           0.96 |      5.61 |           222.65 |       19.72 |                     4.99 |
| 190 | NEGATED | exactly      | thrilling        |    43 |            41.30 |           0.92 |      6.30 |           265.38 |        9.09 |                     4.98 |
| 191 | NEGATED | exactly      | black            |    46 |            44.15 |           0.90 |      6.51 |           278.89 |        8.61 |                     4.96 |
| 192 | NEGATED | only         | cumbersome       |    46 |            44.15 |           0.90 |      6.51 |           278.89 |        8.61 |                     4.96 |
| 193 | NEGATED | exactly      | scary            |    38 |            36.52 |           0.94 |      5.92 |           239.62 |        9.91 |                     4.94 |
| 194 | NEGATED | exactly      | optimistic       |    42 |            40.33 |           0.92 |      6.22 |           258.92 |        9.06 |                     4.94 |
| 195 | NEGATED | only         | appalling        |    33 |            31.75 |           0.96 |      5.53 |           216.10 |       19.68 |                     4.93 |
| 196 | NEGATED | only         | captivating      |    33 |            31.75 |           0.96 |      5.53 |           216.10 |       19.68 |                     4.93 |
| 197 | NEGATED | that         | thrilling        |    37 |            35.56 |           0.94 |      5.85 |           233.13 |        9.88 |                     4.89 |
| 198 | NEGATED | only         | undemocratic     |    37 |            35.56 |           0.94 |      5.85 |           233.13 |        9.88 |                     4.89 |
| 199 | NEGATED | exactly      | illegal          |    32 |            30.79 |           0.96 |      5.44 |           209.55 |       19.63 |                     4.87 |
| 200 | NEGATED | exactly      | fashionable      |    32 |            30.79 |           0.96 |      5.44 |           209.55 |       19.63 |                     4.87 |
| 201 | NEGATED | exactly      | stable           |    40 |            38.41 |           0.91 |      6.07 |           246.02 |        8.99 |                     4.85 |
| 202 | NEGATED | exactly      | confident        |    40 |            38.41 |           0.91 |      6.07 |           246.02 |        8.99 |                     4.85 |
| 203 | NEGATED | necessarily  | accessible       |    36 |            34.60 |           0.94 |      5.77 |           226.63 |        9.84 |                     4.84 |
| 204 | NEGATED | exactly      | fast             |    36 |            34.60 |           0.94 |      5.77 |           226.63 |        9.84 |                     4.84 |
| 205 | NEGATED | exactly      | natural          |    36 |            34.60 |           0.94 |      5.77 |           226.63 |        9.84 |                     4.84 |
| 206 | NEGATED | necessarily  | noticeable       |    31 |            29.83 |           0.96 |      5.36 |           203.01 |       19.59 |                     4.81 |
| 207 | NEGATED | exactly      | dominant         |    31 |            29.83 |           0.96 |      5.36 |           203.01 |       19.59 |                     4.81 |
| 208 | NEGATED | exactly      | plentiful        |    31 |            29.83 |           0.96 |      5.36 |           203.01 |       19.59 |                     4.81 |
| 209 | NEGATED | exactly      | attractive       |    39 |            37.45 |           0.91 |      6.00 |           239.57 |        8.95 |                     4.80 |
| 210 | NEGATED | exactly      | prolific         |    35 |            33.64 |           0.93 |      5.69 |           220.14 |        9.80 |                     4.78 |
| 211 | NEGATED | exactly      | elegant          |    35 |            33.64 |           0.93 |      5.69 |           220.14 |        9.80 |                     4.78 |
| 212 | NEGATED | necessarily  | evident          |    30 |            28.86 |           0.96 |      5.27 |           196.46 |       19.54 |                     4.74 |
| 213 | NEGATED | necessarily  | proof            |    34 |            32.68 |           0.93 |      5.60 |           213.65 |        9.75 |                     4.73 |
| 214 | NEGATED | only         | unavoidable      |    34 |            32.68 |           0.93 |      5.60 |           213.65 |        9.75 |                     4.73 |
| 215 | NEGATED | exactly      | terrible         |    34 |            32.68 |           0.93 |      5.60 |           213.65 |        9.75 |                     4.73 |
| 216 | NEGATED | about        | left             |    37 |            35.52 |           0.91 |      5.84 |           226.67 |        8.88 |                     4.70 |
| 217 | NEGATED | exactly      | expensive        |    37 |            35.52 |           0.91 |      5.84 |           226.67 |        8.88 |                     4.70 |
| 218 | NEGATED | exactly      | unbiased         |    29 |            27.90 |           0.96 |      5.18 |           189.91 |       19.49 |                     4.68 |
| 219 | NEGATED | only         | unsupported      |    33 |            31.71 |           0.93 |      5.52 |           207.16 |        9.71 |                     4.67 |
| 220 | NEGATED | only         | unwarranted      |    36 |            34.56 |           0.91 |      5.76 |           220.23 |        8.84 |                     4.64 |
| 221 | NEGATED | exactly      | truthful         |    36 |            34.56 |           0.91 |      5.76 |           220.23 |        8.84 |                     4.64 |
| 222 | NEGATED | that         | unexpected       |    36 |            34.56 |           0.91 |      5.76 |           220.23 |        8.84 |                     4.64 |
| 223 | NEGATED | only         | breathtaking     |    32 |            30.75 |           0.93 |      5.44 |           200.67 |        9.67 |                     4.61 |
| 224 | NEGATED | that         | typical          |    28 |            26.94 |           0.96 |      5.09 |           183.36 |       19.44 |                     4.61 |
| 225 | NEGATED | only         | invaluable       |    32 |            30.75 |           0.93 |      5.44 |           200.67 |        9.67 |                     4.61 |
| 226 | NEGATED | only         | spacious         |    32 |            30.75 |           0.93 |      5.44 |           200.67 |        9.67 |                     4.61 |
| 227 | NEGATED | only         | baseless         |    32 |            30.75 |           0.93 |      5.44 |           200.67 |        9.67 |                     4.61 |
| 228 | NEGATED | only         | sleek            |    35 |            33.60 |           0.91 |      5.68 |           213.79 |        8.80 |                     4.59 |
| 229 | NEGATED | exactly      | affordable       |    35 |            33.60 |           0.91 |      5.68 |           213.79 |        8.80 |                     4.59 |
| 230 | NEGATED | exactly      | likely           |    31 |            29.79 |           0.93 |      5.35 |           194.18 |        9.62 |                     4.54 |
| 231 | NEGATED | exactly      | strong           |    34 |            32.64 |           0.91 |      5.60 |           207.36 |        8.75 |                     4.53 |
| 232 | NEGATED | only         | unscientific     |    27 |            25.98 |           0.96 |      5.00 |           176.81 |       19.39 |                     4.53 |
| 233 | NEGATED | that         | unreasonable     |    34 |            32.64 |           0.91 |      5.60 |           207.36 |        8.75 |                     4.53 |
| 234 | NEGATED | exactly      | feasible         |    30 |            28.83 |           0.93 |      5.26 |           187.70 |        9.57 |                     4.48 |
| 235 | NEGATED | exactly      | robust           |    33 |            31.68 |           0.91 |      5.51 |           200.93 |        8.71 |                     4.47 |
| 236 | NEGATED | exactly      | private          |    33 |            31.68 |           0.91 |      5.51 |           200.93 |        8.71 |                     4.47 |
| 237 | NEGATED | nearly       | smart            |    26 |            25.02 |           0.96 |      4.91 |           170.26 |       19.33 |                     4.45 |
| 238 | NEGATED | exactly      | green            |    26 |            25.02 |           0.96 |      4.91 |           170.26 |       19.33 |                     4.45 |
| 239 | NEGATED | necessarily  | fond             |    26 |            25.02 |           0.96 |      4.91 |           170.26 |       19.33 |                     4.45 |
| 240 | NEGATED | just         | notable          |    32 |            30.71 |           0.90 |      5.43 |           194.50 |        8.67 |                     4.41 |
| 241 | NEGATED | only         | cozy             |    29 |            27.86 |           0.93 |      5.17 |           181.22 |        9.52 |                     4.41 |
| 242 | NEGATED | exactly      | smart            |    32 |            30.71 |           0.90 |      5.43 |           194.50 |        8.67 |                     4.41 |
| 243 | NEGATED | exactly      | slow             |    32 |            30.71 |           0.90 |      5.43 |           194.50 |        8.67 |                     4.41 |
| 244 | NEGATED | exactly      | compatible       |    32 |            30.71 |           0.90 |      5.43 |           194.50 |        8.67 |                     4.41 |
| 245 | NEGATED | exactly      | flawless         |    29 |            27.86 |           0.93 |      5.17 |           181.22 |        9.52 |                     4.41 |
| 246 | NEGATED | exactly      | revelatory       |    29 |            27.86 |           0.93 |      5.17 |           181.22 |        9.52 |                     4.41 |
| 247 | NEGATED | that         | compatible       |    29 |            27.86 |           0.93 |      5.17 |           181.22 |        9.52 |                     4.41 |
| 248 | NEGATED | only         | short-sighted    |    32 |            30.71 |           0.90 |      5.43 |           194.50 |        8.67 |                     4.41 |
| 249 | NEGATED | yet          | applicable       |    25 |            24.05 |           0.96 |      4.81 |           163.71 |       19.28 |                     4.37 |
| 250 | NEGATED | exactly      | abundant         |    25 |            24.05 |           0.96 |      4.81 |           163.71 |       19.28 |                     4.37 |
| 251 | NEGATED | entirely     | fond             |    31 |            29.75 |           0.90 |      5.34 |           188.07 |        8.62 |                     4.34 |
| 252 | NEGATED | exactly      | sad              |    31 |            29.75 |           0.90 |      5.34 |           188.07 |        8.62 |                     4.34 |
| 253 | NEGATED | necessarily  | enjoyable        |    28 |            26.90 |           0.93 |      5.08 |           174.74 |        9.47 |                     4.33 |
| 254 | NEGATED | exactly      | evil             |    28 |            26.90 |           0.93 |      5.08 |           174.74 |        9.47 |                     4.33 |
| 255 | NEGATED | only         | unconscionable   |    24 |            23.09 |           0.96 |      4.71 |           157.17 |       19.22 |                     4.29 |
| 256 | NEGATED | that         | talkative        |    24 |            23.09 |           0.96 |      4.71 |           157.17 |       19.22 |                     4.29 |
| 257 | NEGATED | only         | unappealing      |    24 |            23.09 |           0.96 |      4.71 |           157.17 |       19.22 |                     4.29 |
| 258 | NEGATED | exactly      | bare             |    27 |            25.94 |           0.93 |      4.99 |           168.26 |        9.42 |                     4.26 |
| 259 | NEGATED | yet          | jaded            |    27 |            25.94 |           0.93 |      4.99 |           168.26 |        9.42 |                     4.26 |
| 260 | NEGATED | exactly      | indicative       |    27 |            25.94 |           0.93 |      4.99 |           168.26 |        9.42 |                     4.26 |
| 261 | NEGATED | exactly      | obscure          |    23 |            22.13 |           0.96 |      4.61 |           150.62 |       19.16 |                     4.20 |
| 262 | NEGATED | exactly      | shabby           |    23 |            22.13 |           0.96 |      4.61 |           150.62 |       19.16 |                     4.20 |
| 263 | NEGATED | exactly      | interesting      |    23 |            22.13 |           0.96 |      4.61 |           150.62 |       19.16 |                     4.20 |
| 264 | NEGATED | that         | lucrative        |    23 |            22.13 |           0.96 |      4.61 |           150.62 |       19.16 |                     4.20 |
| 265 | NEGATED | necessarily  | glamorous        |    23 |            22.13 |           0.96 |      4.61 |           150.62 |       19.16 |                     4.20 |
| 266 | NEGATED | exactly      | unhappy          |    23 |            22.13 |           0.96 |      4.61 |           150.62 |       19.16 |                     4.20 |
| 267 | NEGATED | exactly      | unfamiliar       |    23 |            22.13 |           0.96 |      4.61 |           150.62 |       19.16 |                     4.20 |
| 268 | NEGATED | exactly      | inconspicuous    |    26 |            24.98 |           0.93 |      4.90 |           161.79 |        9.37 |                     4.18 |
| 269 | NEGATED | exactly      | complimentary    |    26 |            24.98 |           0.93 |      4.90 |           161.79 |        9.37 |                     4.18 |
| 270 | NEGATED | exactly      | conventional     |    26 |            24.98 |           0.93 |      4.90 |           161.79 |        9.37 |                     4.18 |
| 271 | NEGATED | exactly      | desirable        |    26 |            24.98 |           0.93 |      4.90 |           161.79 |        9.37 |                     4.18 |
| 272 | NEGATED | only         | antithetical     |    26 |            24.98 |           0.93 |      4.90 |           161.79 |        9.37 |                     4.18 |
| 273 | NEGATED | that         | bothersome       |    26 |            24.98 |           0.93 |      4.90 |           161.79 |        9.37 |                     4.18 |
| 274 | NEGATED | exactly      | silent           |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 275 | NEGATED | exactly      | serious          |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 276 | NEGATED | necessarily  | comforting       |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 277 | NEGATED | exactly      | secure           |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 278 | NEGATED | exactly      | cold             |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 279 | NEGATED | remotely     | life-threatening |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 280 | NEGATED | exactly      | timely           |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 281 | NEGATED | exactly      | thick            |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 282 | NEGATED | exactly      | polite           |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 283 | NEGATED | only         | demeaning        |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 284 | NEGATED | exactly      | stunning         |    22 |            21.17 |           0.96 |      4.51 |           144.07 |       19.09 |                     4.10 |
| 285 | NEGATED | exactly      | soft             |    25 |            24.02 |           0.92 |      4.80 |           155.31 |        9.31 |                     4.09 |
| 286 | NEGATED | necessarily  | life-threatening |    25 |            24.02 |           0.92 |      4.80 |           155.31 |        9.31 |                     4.09 |
| 287 | NEGATED | exactly      | impartial        |    24 |            23.05 |           0.92 |      4.71 |           148.85 |        9.25 |                     4.01 |
| 288 | NEGATED | that         | enticing         |    21 |            20.21 |           0.96 |      4.41 |           137.52 |       19.03 |                     4.00 |
| 289 | NEGATED | exactly      | lightweight      |    21 |            20.21 |           0.96 |      4.41 |           137.52 |       19.03 |                     4.00 |
| 290 | NEGATED | that         | noisy            |    21 |            20.21 |           0.96 |      4.41 |           137.52 |       19.03 |                     4.00 |
| 291 | NEGATED | exactly      | objective        |    21 |            20.21 |           0.96 |      4.41 |           137.52 |       19.03 |                     4.00 |
| 292 | NEGATED | altogether   | sorry            |    21 |            20.21 |           0.96 |      4.41 |           137.52 |       19.03 |                     4.00 |
| 293 | NEGATED | exactly      | portable         |    21 |            20.21 |           0.96 |      4.41 |           137.52 |       19.03 |                     4.00 |
| 294 | NEGATED | exactly      | accessible       |    23 |            22.09 |           0.92 |      4.61 |           142.38 |        9.19 |                     3.91 |
| 295 | NEGATED | only         | sexist           |    23 |            22.09 |           0.92 |      4.61 |           142.38 |        9.19 |                     3.91 |
| 296 | NEGATED | necessarily  | greener          |    23 |            22.09 |           0.92 |      4.61 |           142.38 |        9.19 |                     3.91 |
| 297 | NEGATED | exactly      | radical          |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 298 | NEGATED | exactly      | apologetic       |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 299 | NEGATED | exactly      | thin             |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 300 | NEGATED | exactly      | foolproof        |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 301 | NEGATED | necessarily  | wet              |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 302 | NEGATED | exactly      | authentic        |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 303 | NEGATED | exactly      | fantastic        |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 304 | NEGATED | that         | arduous          |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 305 | NEGATED | exactly      | sharp            |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 306 | NEGATED | therefore    | ashamed          |    20 |            19.24 |           0.96 |      4.30 |           130.97 |       18.96 |                     3.89 |
| 307 | NEGATED | that         | welcoming        |    22 |            21.13 |           0.92 |      4.50 |           135.92 |        9.13 |                     3.81 |
| 308 | NEGATED | exactly      | concerned        |    22 |            21.13 |           0.92 |      4.50 |           135.92 |        9.13 |                     3.81 |
| 309 | NEGATED | exactly      | sustainable      |    22 |            21.13 |           0.92 |      4.50 |           135.92 |        9.13 |                     3.81 |
| 310 | NEGATED | that         | technical        |    22 |            21.13 |           0.92 |      4.50 |           135.92 |        9.13 |                     3.81 |
| 311 | NEGATED | only         | preposterous     |    22 |            21.13 |           0.92 |      4.50 |           135.92 |        9.13 |                     3.81 |
| 312 | NEGATED | that         | proficient       |    19 |            18.28 |           0.96 |      4.19 |           124.42 |       18.88 |                     3.77 |
| 313 | NEGATED | exactly      | profound         |    19 |            18.28 |           0.96 |      4.19 |           124.42 |       18.88 |                     3.77 |
| 314 | NEGATED | exactly      | bullish          |    19 |            18.28 |           0.96 |      4.19 |           124.42 |       18.88 |                     3.77 |
| 315 | NEGATED | even         | gonna            |    21 |            20.17 |           0.92 |      4.40 |           129.46 |        9.06 |                     3.70 |
| 316 | NEGATED | exactly      | enticing         |    21 |            20.17 |           0.92 |      4.40 |           129.46 |        9.06 |                     3.70 |
| 317 | NEGATED | only         | fabulous         |    21 |            20.17 |           0.92 |      4.40 |           129.46 |        9.06 |                     3.70 |
| 318 | NEGATED | yet          | comparable       |    21 |            20.17 |           0.92 |      4.40 |           129.46 |        9.06 |                     3.70 |
| 319 | NEGATED | exactly      | spacious         |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 320 | NEGATED | exactly      | trivial          |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 321 | NEGATED | exactly      | convincing       |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 322 | NEGATED | exactly      | crisp            |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 323 | NEGATED | that         | particular       |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 324 | NEGATED | exactly      | stylish          |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 325 | NEGATED | that         | outgoing         |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 326 | NEGATED | together     | exclusive        |    18 |            17.32 |           0.96 |      4.08 |           117.87 |       18.80 |                     3.64 |
| 327 | NEGATED | that         | mysterious       |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 328 | NEGATED | yet          | late             |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 329 | NEGATED | exactly      | ugly             |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 330 | NEGATED | only         | unheard          |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 331 | NEGATED | exactly      | heroic           |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 332 | NEGATED | mutually     | exceptional      |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 333 | NEGATED | exactly      | creative         |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 334 | NEGATED | that         | unheard          |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 335 | NEGATED | exactly      | discreet         |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 336 | NEGATED | exactly      | desperate        |    20 |            19.21 |           0.91 |      4.29 |           123.01 |        8.99 |                     3.59 |
| 337 | NEGATED | only         | cathartic        |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 338 | NEGATED | quite        | zero             |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 339 | NEGATED | exactly      | upbeat           |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 340 | NEGATED | exactly      | random           |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 341 | NEGATED | exactly      | newsworthy       |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 342 | NEGATED | exactly      | gentle           |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 343 | NEGATED | exactly      | heavy            |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 344 | NEGATED | exactly      | awesome          |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 345 | NEGATED | exactly      | likeable         |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 346 | NEGATED | exactly      | believable       |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 347 | NEGATED | exactly      | cooperative      |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 348 | NEGATED | exactly      | cheery           |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 349 | NEGATED | exactly      | controversial    |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 350 | NEGATED | exactly      | receptive        |    17 |            16.36 |           0.96 |      3.97 |           111.33 |       18.72 |                     3.50 |
| 351 | NEGATED | exactly      | entertaining     |    19 |            18.24 |           0.91 |      4.19 |           116.56 |        8.91 |                     3.47 |
| 352 | NEGATED | necessarily  | satisfying       |    19 |            18.24 |           0.91 |      4.19 |           116.56 |        8.91 |                     3.47 |
| 353 | NEGATED | exactly      | mind-blowing     |    19 |            18.24 |           0.91 |      4.19 |           116.56 |        8.91 |                     3.47 |
| 354 | NEGATED | that         | appetizing       |    19 |            18.24 |           0.91 |      4.19 |           116.56 |        8.91 |                     3.47 |
| 355 | NEGATED | remotely     | subtle           |    19 |            18.24 |           0.91 |      4.19 |           116.56 |        8.91 |                     3.47 |
| 356 | NEGATED | yet          | expired          |    19 |            18.24 |           0.91 |      4.19 |           116.56 |        8.91 |                     3.47 |
| 357 | NEGATED | necessarily  | mandatory        |    16 |            15.39 |           0.96 |      3.85 |           104.78 |       18.63 |                     3.35 |
| 358 | NEGATED | exactly      | voluntary        |    16 |            15.39 |           0.96 |      3.85 |           104.78 |       18.63 |                     3.35 |
| 359 | NEGATED | exactly      | magic            |    16 |            15.39 |           0.96 |      3.85 |           104.78 |       18.63 |                     3.35 |
| 360 | NEGATED | that         | organized        |    16 |            15.39 |           0.96 |      3.85 |           104.78 |       18.63 |                     3.35 |
| 361 | NEGATED | necessarily  | chronological    |    16 |            15.39 |           0.96 |      3.85 |           104.78 |       18.63 |                     3.35 |
| 362 | NEGATED | exactly      | devoid           |    16 |            15.39 |           0.96 |      3.85 |           104.78 |       18.63 |                     3.35 |
| 363 | NEGATED | exactly      | top-notch        |    16 |            15.39 |           0.96 |      3.85 |           104.78 |       18.63 |                     3.35 |
| 364 | NEGATED | necessarily  | sweet            |    18 |            17.28 |           0.91 |      4.07 |           110.12 |        8.84 |                     3.34 |
| 365 | NEGATED | exactly      | complicated      |    18 |            17.28 |           0.91 |      4.07 |           110.12 |        8.84 |                     3.34 |
| 366 | NEGATED | always       | conventional     |    18 |            17.28 |           0.91 |      4.07 |           110.12 |        8.84 |                     3.34 |
| 367 | NEGATED | exactly      | relaxing         |    18 |            17.28 |           0.91 |      4.07 |           110.12 |        8.84 |                     3.34 |
| 368 | NEGATED | only         | barbaric         |    18 |            17.28 |           0.91 |      4.07 |           110.12 |        8.84 |                     3.34 |
| 369 | NEGATED | exactly      | foreign          |    18 |            17.28 |           0.91 |      4.07 |           110.12 |        8.84 |                     3.34 |
| 370 | NEGATED | exactly      | graceful         |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 371 | NEGATED | that         | restrictive      |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 372 | NEGATED | exactly      | flashy           |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 373 | NEGATED | exactly      | prone            |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 374 | NEGATED | necessarily  | advantageous     |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 375 | NEGATED | that         | notable          |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 376 | NEGATED | necessarily  | wealthier        |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 377 | NEGATED | exactly      | logical          |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 378 | NEGATED | exactly      | official         |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 379 | NEGATED | only         | unworkable       |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 380 | NEGATED | only         | picturesque      |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 381 | NEGATED | exactly      | progressive      |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 382 | NEGATED | only         | unafraid         |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 383 | NEGATED | even         | midnight         |    17 |            16.32 |           0.91 |      3.96 |           103.68 |        8.75 |                     3.19 |
| 384 | NEGATED | exactly      | ordinary         |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 385 | NEGATED | exactly      | delighted        |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 386 | NEGATED | exactly      | disappointed     |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 387 | NEGATED | individually | redeemable       |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 388 | NEGATED | exactly      | averse           |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 389 | NEGATED | necessarily  | vegan            |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 390 | NEGATED | only         | breathable       |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 391 | NEGATED | exactly      | scared           |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 392 | NEGATED | exactly      | catchy           |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 393 | NEGATED | that         | drunk            |    15 |            14.43 |           0.96 |      3.73 |            98.23 |       18.54 |                     3.18 |
| 394 | NEGATED | exactly      | outstanding      |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 395 | NEGATED | only         | senseless        |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 396 | NEGATED | that         | flavorful        |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 397 | NEGATED | necessarily  | sexy             |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 398 | NEGATED | necessarily  | overt            |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 399 | NEGATED | yet          | satisfactory     |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 400 | NEGATED | only         | implausible      |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 401 | NEGATED | necessarily  | flashy           |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 402 | NEGATED | only         | intrusive        |    16 |            15.36 |           0.90 |      3.84 |            97.25 |        8.67 |                     3.03 |
| 403 | NEGATED | exactly      | inaccurate       |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 404 | NEGATED | really       | wealthier        |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 405 | NEGATED | exactly      | insignificant    |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 406 | NEGATED | exactly      | fluent           |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 407 | NEGATED | yet          | closed           |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 408 | NEGATED | exactly      | hopeful          |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 409 | NEGATED | exactly      | picturesque      |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 410 | NEGATED | exactly      | seamless         |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 411 | NEGATED | necessarily  | unheard          |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 412 | NEGATED | necessarily  | thrilled         |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 413 | NEGATED | exactly      | intimidating     |    14 |            13.47 |           0.96 |      3.60 |            91.68 |       18.44 |                     3.00 |
| 414 | NEGATED | only         | evocative        |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 415 | NEGATED | exactly      | picky            |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 416 | NEGATED | yet          | drunk            |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 417 | NEGATED | that         | favourable       |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 418 | NEGATED | exactly      | balmy            |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 419 | NEGATED | exactly      | encouraging      |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 420 | NEGATED | only         | disproportionate |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 421 | NEGATED | exactly      | credible         |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 422 | NEGATED | only         | audacious        |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 423 | NEGATED | exactly      | universal        |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 424 | NEGATED | exactly      | passionate       |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 425 | NEGATED | exactly      | conclusive       |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 426 | NEGATED | exactly      | photogenic       |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 427 | NEGATED | exactly      | heartening       |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 428 | NEGATED | exactly      | startling        |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 429 | NEGATED | just         | eco-friendly     |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 430 | NEGATED | yet          | inclined         |    13 |            12.51 |           0.96 |      3.47 |            85.13 |       18.33 |                     2.79 |
| 431 | NEGATED | yet          | midnight         |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 432 | NEGATED | exactly      | acceptable       |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 433 | NEGATED | remotely     | impressed        |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 434 | NEGATED | only         | unforgettable    |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 435 | NEGATED | only         | regressive       |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 436 | NEGATED | exactly      | homeless         |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 437 | NEGATED | only         | cowardly         |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 438 | NEGATED | necessarily  | overwhelming     |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 439 | NEGATED | exactly      | restful          |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 440 | NEGATED | exactly      | relatable        |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 441 | NEGATED | exactly      | unpredictable    |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 442 | NEGATED | exactly      | wealthy          |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 443 | NEGATED | exactly      | rife             |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 444 | NEGATED | exactly      | favourable       |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 445 | NEGATED | exactly      | pleasing         |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 446 | NEGATED | only         | antiquated       |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 447 | NEGATED | only         | festive          |    12 |            11.55 |           0.96 |      3.33 |            78.58 |       18.22 |                     2.56 |
| 448 | NEGATED | exactly      | accustomed       |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 449 | NEGATED | necessarily  | contiguous       |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 450 | NEGATED | exactly      | eco-friendly     |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 451 | NEGATED | necessarily  | welcoming        |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 452 | NEGATED | originally   | sure             |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 453 | NEGATED | only         | unconvincing     |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 454 | NEGATED | exactly      | fancy            |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 455 | NEGATED | exactly      | inspirational    |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 456 | NEGATED | exactly      | pretty           |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 457 | NEGATED | only         | systemic         |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 458 | NEGATED | only         | ill-equipped     |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 459 | NEGATED | exactly      | unfounded        |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 460 | NEGATED | exactly      | gracious         |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 461 | NEGATED | exactly      | psyched          |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 462 | NEGATED | exactly      | heartwarming     |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 463 | NEGATED | exactly      | elite            |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 464 | NEGATED | exactly      | expansive        |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 465 | NEGATED | exactly      | capable          |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 466 | NEGATED | exactly      | magical          |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 467 | NEGATED | exactly      | noteworthy       |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 468 | NEGATED | necessarily  | privy            |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 469 | NEGATED | necessarily  | reassuring       |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 470 | NEGATED | necessarily  | measurable       |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 471 | NEGATED | only         | un-american      |    11 |            10.58 |           0.96 |      3.19 |            72.03 |       18.09 |                     2.30 |
| 472 | NEGATED | exactly      | handy            |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 473 | NEGATED | exactly      | liberal          |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 474 | NEGATED | exactly      | ubiquitous       |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 475 | NEGATED | exactly      | admirable        |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 476 | NEGATED | exactly      | diplomatic       |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 477 | NEGATED | that         | infrequent       |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 478 | NEGATED | only         | unapologetic     |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 479 | NEGATED | exactly      | hungry           |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 480 | NEGATED | only         | colourful        |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 481 | NEGATED | exactly      | instant          |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 482 | NEGATED | exactly      | public           |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 483 | NEGATED | exactly      | imminent         |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 484 | NEGATED | yet          | brown            |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 485 | NEGATED | exactly      | helpless         |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 486 | NEGATED | quite        | ours             |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 487 | NEGATED | exactly      | extensive        |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 488 | NEGATED | necessarily  | symmetrical      |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 489 | NEGATED | only         | budget-friendly  |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 490 | NEGATED | only         | mean-spirited    |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 491 | NEGATED | only         | mesmerizing      |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 492 | NEGATED | only         | delectable       |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 493 | NEGATED | only         | counterintuitive |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 494 | NEGATED | exactly      | fierce           |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 495 | NEGATED | exactly      | prevalent        |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 496 | NEGATED | exactly      | anonymous        |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 497 | NEGATED | exactly      | spoilt           |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 498 | NEGATED | yet          | delinquent       |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |
| 499 | NEGATED | that         | charismatic      |    10 |             9.62 |           0.96 |      3.04 |            65.49 |       17.96 |                     1.99 |


In [22]:
dfs == dfs_plus

True

In [23]:
def _optimize(df_dict, verbose=False):
    for unit, _df in df_dict.items():
        if verbose:
            print('>> Unoptimized <<')
            _df.info(memory_usage='deep')
        str_cols = _df.select_dtypes(exclude='number').columns.to_list()
        int_cols = _df.columns[_df.columns.str.startswith(('r_', 'C', 'R', 'N', 'f', 'index'))].to_list()
        is_float = ~_df.columns.isin(int_cols + str_cols)
        _df[int_cols] = _df[int_cols].apply(pd.to_numeric, downcast = 'unsigned')
        _df.loc[:, is_float] = _df.loc[:, is_float].apply(pd.to_numeric, downcast = 'float')
        _df[str_cols] = _df[str_cols].apply(
            lambda c: c.astype('string').astype('category') 
                    if c.dtype != 'category' and c.nunique() >( len(c) / 2)
                    else c)
        if verbose:
            print('\n--------\n>> Optimized DataFrame')
            _df.info(memory_usage='deep')
        _df['l1'] = _df['l1'].astype('category')
        df_dict[unit] = _df
        if verbose: 
            print('\n\n============\n\n')
    return df_dict

In [24]:
# dfs = _optimize(dfs, verbose=True)

In [25]:
dfs_plus = _optimize(dfs_plus, verbose=True)

>> Unoptimized <<
<class 'pandas.core.frame.DataFrame'>
Index: 246921 entries, NEG-exactly_shy to COM-yet_clear
Data columns (total 59 columns):
 #   Column                    Non-Null Count   Dtype   
---  ------                    --------------   -----   
 0   index                     246921 non-null  int64   
 1   l1                        246921 non-null  object  
 2   l2                        246921 non-null  object  
 3   f                         246921 non-null  int64   
 4   E11                       246921 non-null  float64 
 5   r_log_likelihood          246921 non-null  int64   
 6   r_log_likelihood_tt       246921 non-null  int64   
 7   r_odds_ratio_disc         246921 non-null  int64   
 8   r_Dice                    246921 non-null  int64   
 9   r_t_score                 246921 non-null  int64   
 10  r_p1_given2               246921 non-null  int64   
 11  r_p2_given1               246921 non-null  int64   
 12  r_p1_given2_simple        246921 non-null  int64   



--------
>> Optimized DataFrame
<class 'pandas.core.frame.DataFrame'>
Index: 246921 entries, NEG-exactly_shy to COM-yet_clear
Data columns (total 59 columns):
 #   Column                    Non-Null Count   Dtype   
---  ------                    --------------   -----   
 0   index                     246921 non-null  uint32  
 1   l1                        246921 non-null  object  
 2   l2                        246921 non-null  category
 3   f                         246921 non-null  uint32  
 4   E11                       246921 non-null  float64 
 5   r_log_likelihood          246921 non-null  uint32  
 6   r_log_likelihood_tt       246921 non-null  uint32  
 7   r_odds_ratio_disc         246921 non-null  uint32  
 8   r_Dice                    246921 non-null  uint32  
 9   r_t_score                 246921 non-null  uint32  
 10  r_p1_given2               246921 non-null  uint32  
 11  r_p2_given1               246921 non-null  uint32  
 12  r_p1_given2_simple        246921 non-

In [26]:
def save_dataframe(input_name, _df, added_measures=False):
    out_dir = SANPI_DIR / 'results' / 'ucs_tables' / 'dataframes' 
    confirm_dir(out_dir)

    out_path = out_dir / input_name.replace('.csv', '.pkl.gz')
    
    if added_measures:
        out_path = out_path.with_name(out_path.name.replace('.pkl.gz', '_extra.pkl.gz'))
    
    _df.to_pickle(out_path)
    


In [27]:
for unit in csv_paths.index:
    # save_dataframe(csv_paths[unit].name, dfs[unit])
    save_dataframe(csv_paths[unit].name, dfs_plus[unit], added_measures=True)
    