In [467]:
import pandas as pd
import re
import source.utils.associate as ass

from source.utils import am_notebooks as an
from source.utils.general import confirm_dir, snake_to_camel

SAMPLE_COLS = ['f', 'f1', 'f2', 'N', 
               'exp_f', 'unexp_f', 'unexp_r',
               'P1',  'P2',
               'dP1', 'dP2',
               'deltaP_mean',
            #    'deltaP_max',
               'LRC', 'G2',
               ]
MIN = 500
POLAR_ADV_MIN = 1000
POLAR_ADJ_MIN = 300
POLAR_BIG_MIN = 100

MIR_BLIND_COLOR = 'BuPu'
SUP_BLIND_COLOR = 'RdPu'
MIR_POLAR_COLOR = 'purple_teal'
SUP_POLAR_COLOR = 'anastasia'

VERBOSE = True
TEX_AM_SAMPLE_DIR = an.LATEX_TABLES.joinpath('ch4/samples')
confirm_dir(TEX_AM_SAMPLE_DIR)

def get_polar_label(unit):
    long_form = {'adv':'adverb', 'adj': 'adjective', 'bigram': 'bigram'}
    return f'{long_form[unit.lower()]} Polarity'.title()
print(get_polar_label('adv'))

Adverb Polarity


In [468]:

def _load_amdf(dataset='direct', sampling='ALL', f_floor=MIN, polar=False, unit='adv'):
    #^ e.g. polar/RBdirect/adv/extra/polarized-adv_NEQ-direct_min500x_extra.parq
    if polar:
        path_stub = ('polar/'
                     + (f'RB' if dataset == 'direct' else '')
                     + f'{dataset}/{unit}/extra/polarized-{unit}'
                     + f'_{sampling}-{dataset}_min{f_floor}x_extra.parq')
    else:
        path_stub = ('adv_adj/'
                     + f'ANY{dataset}/extra/AdvAdj_{sampling}'
                     + f'_any-{dataset}_final-freq_min{f_floor}x_extra.parq')
    _df = ass.adjust_am_names(
        pd.read_parquet(
            ass.AM_DF_DIR.joinpath(path_stub))
    )
    if not polar:
        _df = _df.assign(key=_df.l1.astype('string') + '~' +
                     _df.l2.astype('string')).set_index('key')
    _df = _df.filter(SAMPLE_COLS).sort_values(['LRC','unexp_r']).rename(columns={'deltaP_mean':'dPavg'})
    _df.columns = _df.columns.to_series().apply(snake_to_camel)
    _df['LogR'] =  ass.am.log_ratio(ass.fq.expected_frequencies(_df, observed=True))
    return _df

# tester = _load_amdf(f_floor=2000, polar=True)
# tester

In [504]:
def latexify_sample(sample_df: pd.DataFrame,
                    cmap: str = None,
                    mirror: bool = False,
                    neq: bool = False,
                    comparison_label: str = 'context--blind',
                    dry: bool = False,  # ! Don't write to file!,
                    f_floor: int = MIN
                    ):
    caption, label = _set_meta_strings(mirror, neq, comparison_label)
    tex_tab_str = _build_tex_table(sample_df, cmap, caption, label)
    tex_tab_str = '\n'.join(
        (
            f'%> Frequency Floor of Sample = {f_floor:,}',
            f'%> Caption: {caption}',
            f'%> Label: {label}',
            '\singlespacing\\footnotesize',
            tex_tab_str,
            '\\normalspacing\\normalsize'
        ))
    am_sample_path = TEX_AM_SAMPLE_DIR.joinpath(f'{label}.tex')
    print(f'Writing "{caption}" Table to\n  {am_sample_path}')
    if not dry:
        am_sample_path.write_text(tex_tab_str, encoding='utf-8')
    else:
        print('\n<< Dry Run --- Nothing Saved >>')
        print(f'Frequency Floor of Sample = {f_floor:,}')
        print(tex_tab_str)


def _set_meta_strings(mirror, neq, comparison_label):
    sampling_tag = 'NEQ' if neq else 'ALL'
    dataset_tag = ('mirror subset' if mirror else 'superset')
    caption = ('\\cmbr{'
               + sampling_tag
               + '+} '
               + dataset_tag.title()
               + f': {comparison_label.title()} AM Sample')
    print(caption)
    label = '-'.join([f'AM={comparison_label.split("-")[-1].lower()}',
                      dataset_tag.split(' ')[0].replace('set', ''),
                     sampling_tag]).strip('-').replace(' ', '-')
    print(label)
    return caption, label


def _build_tex_table(sample_df, cmap, caption, label):
    max_digits = len(str(sample_df.loc["N", :].max()).split('.')[0])
    print(f'N = {int(sample_df.T.N.max()):,}')
    print(f'max digits = {max_digits}')

    sample_df = _adjust_header(sample_df)
    sample_for_sty = sample_df.convert_dtypes()
        
    sample_sty = an.format_zeros(
        sample_for_sty
        .style.background_gradient(cmap, axis=1)
        .format(escape='latex')
        .format_index(escape='latex')
        # .format_index(escape='latex', axis=1)
    )
    column_format = _set_col_format(n_cols=len(sample_df.columns),
                                    max_digits=max_digits,
                                    blind='blind' in label)
    _tex_table_str = sample_sty.to_latex(
        siunitx=True,
        multicol_align='c',
        environment='longtable',
        caption=caption,
        label='tab:'+label.replace('=', '-'),
        convert_css=True,
        sparse_columns=True,
        column_format=column_format,
        hrules='\\midrule')
    # print(re.findall(r'\\\s*\n([\w]+)\s*(?=&)', _tex_table_str))
    _tex_table_str = re.sub(
        r'(?<=\n)([\w]+)\s*(?=&)', '__cmtt{'+r'\1'+'} ', _tex_table_str)
    return _tex_table_str.replace('__cmtt', '\\cmtt')

def _adjust_header(sample_df):
    t_sample_df = sample_df.T
    t_sample_df['key'] = t_sample_df.index.str.replace('~', '$\\sim$')
    if sample_df.columns.str.contains('[A-Z_]').any():
        t_sample_df = an.assign_polarity(t_sample_df)
        t_sample_df['polarity'] = t_sample_df.polarity.map(
            {'neg': '$(-)$', 'pos': '$(+)$'})
        t_sample_df = t_sample_df.set_index(['polarity', 'key'])
    else:
        t_sample_df = t_sample_df.set_index(['key'])
    return t_sample_df.sort_index().T


def _set_col_format(n_cols: int, max_digits: int, blind: bool):
    siparam = [f'table-format=-{max_digits}.3', 'drop-zero-decimal']
    if blind:
        siparam.append('negative-color=BrickRed')
    else: 
        siparam.append('round-minimum=0.001')
    column_format = ('r*{'
                     + f'{n_cols}'
                     + '}{S'+repr(siparam).replace("'", "")
                     + '}')

    return column_format


latexify_sample(_load_amdf(f_floor=2000, polar=False).sample(2).T,
                neq=True, mirror=True, f_floor=2000, dry=True)

\cmbr{NEQ+} Mirror Subset: Context--Blind AM Sample
AM=blind-mirror-NEQ
N = 71,961,373
max digits = 8
Writing "\cmbr{NEQ+} Mirror Subset: Context--Blind AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=blind-mirror-NEQ.tex

<< Dry Run --- Nothing Saved >>
Frequency Floor of Sample = 2,000
%> Frequency Floor of Sample = 2,000
%> Caption: \cmbr{NEQ+} Mirror Subset: Context--Blind AM Sample
%> Label: AM=blind-mirror-NEQ
\singlespacing\footnotesize
\begin{longtable}{r*{2}{S[table-format=-8.3, drop-zero-decimal, negative-color=BrickRed]}}
\caption{\cmbr{NEQ+} Mirror Subset: Context--Blind AM Sample} \label{tab:AM-blind-mirror-NEQ} \\
\toprule
{key} & {all$\sim$happy} & {easily$\sim$available} \\
\midrule
\endfirsthead
\caption[]{\cmbr{NEQ+} Mirror Subset: Context--Blind AM Sample} \\
\toprule
{key} & {all$\sim$happy} & {easily$\sim$available} \\
\midrule
\endhead
\midrule
\multicolumn{3}{r}{Continued on next page} \\
\midrule
\endfoot
\bot

## Context-Blind

In [240]:
all_super_blind1k = _load_amdf(f_floor=1000)
top_lrc_min1k = ass.adjust_am_names(all_super_blind1k).nlargest(20, ['LRC'])
(pd.concat([top_lrc_min1k.nlargest(1, 'LRC'),
            ass.adjust_am_names(all_super_blind1k).nlargest(1, 'dPavg'),
            top_lrc_min1k.nsmallest(1, 'dP2'),
            top_lrc_min1k.nsmallest(1, 'dP1')]).select_dtypes(include='number').drop_duplicates().T.convert_dtypes().style
 ).background_gradient(axis=1)

key,thinly~veiled,scantily~clad,far~away,upwardly~mobile
f,1333.0,4638.0,1150.0,1893.0
f1,1746.0,4682.0,402768.0,2002.0
f2,1705.0,6562.0,1157.0,20114.0
N,71961373.0,71961373.0,71961373.0,71961373.0
expF,0.041368,0.426941,6.475732,0.559581
unexpF,1332.958632,4637.573059,1143.524268,1892.440419
unexpR,0.999969,0.999908,0.994369,0.999704
P1,0.781818,0.706797,0.99395,0.094114
P2,0.763459,0.990602,0.002855,0.945554
dP1,0.781812,0.706796,0.988369,0.094112


In [241]:
top_lrc_min1k = (all_super_blind1k).nlargest(20, ['LRC'])
print(pd.concat([top_lrc_min1k.nlargest(1, 'LRC'), 
            (all_super_blind1k).nlargest(1, 'dPavg'), 
            top_lrc_min1k.nsmallest(1, 'dP2'), 
            top_lrc_min1k.nsmallest(1, 'dP1')])
 .select_dtypes(include='number').T.convert_dtypes()
 .style.background_gradient(axis=1)
 .format(escape='latex').format_index(escape='latex').format_index(escape='latex', axis=1)
 .to_latex(siunitx=True, environment='longtable', column_format='S[table-format=8.2]', convert_css=True)
)

\begin{longtable}{S[table-format=8.2]}
{key} & {thinly\textasciitilde veiled} & {scantily\textasciitilde clad} & {far\textasciitilde away} & {upwardly\textasciitilde mobile} \\
\endfirsthead
{key} & {thinly\textasciitilde veiled} & {scantily\textasciitilde clad} & {far\textasciitilde away} & {upwardly\textasciitilde mobile} \\
\endhead
\multicolumn{5}{r}{Continued on next page} \\
\endfoot
\endlastfoot
f & {\cellcolor[HTML]{F7F0F7}} \color[HTML]{000000} 1333.000000 & {\cellcolor[HTML]{023858}} \color[HTML]{F1F1F1} 4638.000000 & {\cellcolor[HTML]{FFF7FB}} \color[HTML]{000000} 1150.000000 & {\cellcolor[HTML]{D9D8EA}} \color[HTML]{000000} 1893.000000 \\
f1 & {\cellcolor[HTML]{FFF7FB}} \color[HTML]{000000} 1746.000000 & {\cellcolor[HTML]{FEF6FB}} \color[HTML]{000000} 4682.000000 & {\cellcolor[HTML]{023858}} \color[HTML]{F1F1F1} 402768.000000 & {\cellcolor[HTML]{FFF7FB}} \color[HTML]{000000} 2002.000000 \\
f2 & {\cellcolor[HTML]{FBF3F9}} \color[HTML]{000000} 1705.000000 & {\cellcolor[HTML]{

In [244]:
def _pull_blind_samples(all_amdf, neq_amdf, k=4):
    seeking_sample = True
    while seeking_sample:
        sample_NEQ = neq_amdf.sample(k).T
        if all(sample_NEQ.columns.to_series().str.extract(r'^(?P<l1>.+)~(?P<l2>.+)$').nunique() == k): 
            seeking_sample = False
    sample_NEQ = sample_NEQ.convert_dtypes()
    sample_ALL = all_amdf.filter(sample_NEQ.columns, axis=0).T.convert_dtypes()
    if VERBOSE:
        an.nb_display(sample_NEQ.style.background_gradient(axis=1))
        an.nb_display(sample_ALL.style.background_gradient(axis=1))
    else:
        an.nb_display(sample_NEQ.columns.to_series().to_frame('sample'))
    return {'ALL': sample_ALL, 'NEQ':sample_NEQ}

# _pull_blind_samples(_load_amdf(f_floor=2000), _load_amdf(f_floor=2000, sampling='NEQ'))

### Collect *<font color=DarkViolet>Superset</font>* <font color=Teal>Context-Blind</font> AM Samples and Save as $\LaTeX$ Tables

In [138]:

# > load AM dataframes
all_super_blind = _load_amdf()
neq_super_blind = _load_amdf(sampling='NEQ')
an.nb_display(pd.concat([all_super_blind.head(2), neq_super_blind.head(2)]))

Unnamed: 0_level_0,f,f1,f2,N,expF,unexpF,unexpR,P1,P2,dP1,dP2,dPavg,LRC,G2,LogR
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
so~likely,699,5173538,884149,71961373,63564.36,-62865.36,-89.94,0.0,0.0,-0.07,-0.01,-0.04,-6.31,-124953.4,-6.61
most~different,1284,7120428,802192,71961373,79375.23,-78091.23,-60.82,0.0,0.0,-0.1,-0.01,-0.06,-5.88,-154697.9,-6.1
more~good,800,553482,202048,6347362,17618.33,-16818.33,-21.02,0.0,0.0,-0.09,-0.03,-0.06,-4.32,-30751.3,-4.59
as~sure,829,656683,134058,6347362,13869.32,-13040.32,-15.73,0.01,0.0,-0.1,-0.02,-0.06,-3.95,-23069.3,-4.21


In [189]:

# > Pull samples
blind_super_samples = _pull_blind_samples(all_amdf=all_super_blind, neq_amdf=neq_super_blind)

key,yet~final,really~happy,physically~able,so~clear
f,640.0,3000.0,522.0,3583.0
f1,53779.0,182968.0,7930.0,566272.0
f2,1212.0,47131.0,22885.0,83958.0
N,6347362.0,6347362.0,6347362.0,6347362.0
expF,10.27,1358.59,28.59,7490.21
unexpF,629.73,1641.41,493.41,-3907.21
unexpR,0.98,0.55,0.95,-1.09
P1,0.53,0.06,0.02,0.04
P2,0.01,0.02,0.07,0.01
dP1,0.52,0.04,0.02,-0.05


key,yet~final,really~happy,physically~able,so~clear
f,640.0,35529.0,1955.0,10704.0
f1,94755.0,1945388.0,63922.0,5173538.0
f2,5736.0,420680.0,210786.0,346404.0
N,71961373.0,71961373.0,71961373.0,71961373.0
expF,7.55,11372.57,187.24,24904.11
unexpF,632.45,24156.43,1767.76,-14200.11
unexpR,0.99,0.68,0.9,-1.33
P1,0.11,0.08,0.01,0.03
P2,0.01,0.02,0.03,0.0
dP1,0.11,0.06,0.01,-0.04


### Collect *<font color=DeepSalmon>Mirror Subset</font>* <font color=Teal>Context-Blind</font> AM Samples and Save as $\LaTeX$ Tables

In [246]:
# > load AM dataframes
all_mirror_blind = _load_amdf(dataset='mirror', f_floor=300)
neq_mirror_blind = _load_amdf(dataset='mirror', f_floor=300, sampling='NEQ')


In [269]:

# > Pull samples
blind_mirror_samples = _pull_blind_samples(all_amdf=all_mirror_blind, neq_amdf=neq_mirror_blind)

key,more~enjoyable,particularly~good,that~simple,so~beautiful
f,488.0,416.0,487.0,464.0
f1,121848.0,10020.0,4538.0,47033.0
f2,827.0,13484.0,7580.0,2894.0
N,583470.0,583470.0,583470.0,583470.0
expF,172.71,231.56,58.95,233.28
unexpF,315.29,184.44,428.05,230.72
unexpR,0.65,0.44,0.88,0.5
P1,0.59,0.03,0.06,0.16
P2,0.0,0.04,0.11,0.01
dP1,0.38,0.01,0.06,0.08


key,more~enjoyable,particularly~good,that~simple,so~beautiful
f,1241.0,475.0,529.0,1660.0
f1,285773.0,12946.0,5465.0,129421.0
f2,2315.0,31461.0,25344.0,7912.0
N,1680633.0,1680633.0,1680633.0,1680633.0
expF,393.64,242.35,82.41,609.28
unexpF,847.36,232.65,446.59,1050.72
unexpR,0.68,0.49,0.84,0.63
P1,0.54,0.02,0.02,0.21
P2,0.0,0.04,0.1,0.01
dP1,0.37,0.01,0.02,0.13


### 💾 Save <font color=Teal>Context-Blind</font>  Samples as $\LaTeX$ tables

In [505]:
# > Save Samples as $\LaTeX$ tables
for sam in blind_super_samples:

    latexify_sample(blind_super_samples[sam],cmap=SUP_BLIND_COLOR, neq=(sam=='NEQ'))

for sam in blind_mirror_samples:
    latexify_sample(blind_mirror_samples[sam], 
                    mirror=True, 
                    cmap=MIR_BLIND_COLOR,
                    f_floor=300, 
                    neq=(sam=='NEQ'))


\cmbr{ALL+} Superset: Context--Blind AM Sample
AM=blind-super-ALL
N = 71,961,373
max digits = 8
Writing "\cmbr{ALL+} Superset: Context--Blind AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=blind-super-ALL.tex
\cmbr{NEQ+} Superset: Context--Blind AM Sample
AM=blind-super-NEQ
N = 6,347,362
max digits = 7
Writing "\cmbr{NEQ+} Superset: Context--Blind AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=blind-super-NEQ.tex
\cmbr{ALL+} Mirror Subset: Context--Blind AM Sample
AM=blind-mirror-ALL
N = 1,680,633
max digits = 7
Writing "\cmbr{ALL+} Mirror Subset: Context--Blind AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=blind-mirror-ALL.tex
\cmbr{NEQ+} Mirror Subset: Context--Blind AM Sample
AM=blind-mirror-NEQ
N = 583,470
max digits = 6
Writing "\cmbr{NEQ+} Mirror Subset: Context--Blind AM Sample" Table to
  /share/compling/projects/arh2

## Associations with Polarity

### Adverb Polarity

In [271]:
def _pull_polar_samples(all_amdf, neq_amdf, k_lex=2):
    #> l2 strings
    neq_lex = neq_amdf.index.str.split('~').str[1] 
    #> l2 strings that have both + and - rows
    appears_in_both = neq_lex.value_counts().loc[lambda x: x > 1] 
    #> tuple key for sampled lexical items
    sample_lex = tuple(f'~{a}' for a in appears_in_both.to_frame().sample(k_lex).index) 
    
    sample_NEQ = neq_amdf.loc[neq_amdf.index.str.endswith(sample_lex),:].T.convert_dtypes()
    sample_ALL = all_amdf.loc[all_amdf.index.str.endswith(sample_lex),:].T.convert_dtypes()
    if VERBOSE:
        an.nb_display(sample_NEQ.style.background_gradient(axis=1))
        an.nb_display(sample_ALL.style.background_gradient(axis=1))
    else:
        an.nb_display(sample_NEQ.columns.to_series().to_frame('sample'))
    return {'ALL': sample_ALL, 'NEQ':sample_NEQ}

In [274]:
all_super_adv = _load_amdf(dataset='direct', sampling='ALL', unit='adv', polar=True, f_floor=POLAR_ADV_MIN).sort_values(['f2', 'f1'])
neq_super_adv = _load_amdf(dataset='direct', sampling='NEQ', unit='adv', polar=True, f_floor=POLAR_ADV_MIN).sort_values(['f2', 'f1'])

In [307]:
adv_super_samples = _pull_polar_samples(all_amdf=all_super_adv, neq_amdf=neq_super_adv)

key,COM~legally,NEG~legally,COM~easily,NEG~easily
f,1339.0,3213.0,5357.0,7638.0
f1,3173681.0,3173681.0,3173681.0,3173681.0
f2,4552.0,4552.0,12995.0,12995.0
N,6347362.0,6347362.0,6347362.0,6347362.0
expF,2276.0,2276.0,6497.5,6497.5
unexpF,-937.0,937.0,-1140.5,1140.5
unexpR,-0.7,0.29,-0.21,0.15
P1,0.29,0.71,0.41,0.59
P2,0.0,0.0,0.0,0.0
dP1,-0.21,0.21,-0.09,0.09


key,NEG~legally,COM~legally,NEG~easily,COM~easily
f,3213.0,28913.0,7638.0,114031.0
f1,3173681.0,68787692.0,3173681.0,68787692.0
f2,32126.0,32126.0,121669.0,121669.0
N,71961373.0,71961373.0,71961373.0,71961373.0
expF,1416.84,30709.16,5365.91,116303.09
unexpF,1796.16,-1796.16,2272.09,-2272.09
unexpR,0.56,-0.06,0.3,-0.02
P1,0.1,0.9,0.06,0.94
P2,0.0,0.0,0.0,0.0
dP1,0.06,-0.06,0.02,-0.02


In [317]:
all_mirror_adv = _load_amdf(dataset='mirror', sampling='ALL', unit='adv', polar=True, f_floor=POLAR_ADV_MIN).sort_values(['f2', 'f1'])
neq_mirror_adv = _load_amdf(dataset='mirror', sampling='NEQ', unit='adv', polar=True, f_floor=POLAR_ADV_MIN).sort_values(['f2', 'f1'])

In [340]:
adv_mirror_samples = _pull_polar_samples(all_amdf=all_mirror_adv, neq_amdf=neq_mirror_adv)

key,POS~entirely,NEG~entirely,NEG~even,POS~even
f,1596.0,2415.0,2838.0,12140.0
f1,291735.0,291735.0,291735.0,291735.0
f2,4011.0,4011.0,14978.0,14978.0
N,583470.0,583470.0,583470.0,583470.0
expF,2005.5,2005.5,7489.0,7489.0
unexpF,-409.5,409.5,-4651.0,4651.0
unexpR,-0.26,0.17,-1.64,0.38
P1,0.4,0.6,0.19,0.81
P2,0.01,0.01,0.01,0.04
dP1,-0.1,0.1,-0.32,0.32


key,NEG~entirely,POS~entirely,NEG~even,POS~even
f,2415.0,7863.0,2838.0,58341.0
f1,291735.0,1388898.0,291735.0,1388898.0
f2,10278.0,10278.0,61179.0,61179.0
N,1680633.0,1680633.0,1680633.0,1680633.0
expF,1784.12,8493.88,10619.84,50559.16
unexpF,630.88,-630.88,-7781.84,7781.84
unexpR,0.26,-0.08,-2.74,0.13
P1,0.23,0.77,0.05,0.95
P2,0.01,0.01,0.01,0.04
dP1,0.06,-0.06,-0.13,0.13


#### 💾 Save <font color=DeepPink>Adverb Polarity</font> Samples as $\LaTeX$ tables


In [506]:
# > Save Samples as $\LaTeX$ tables
for sam, sdf in adv_super_samples.items():
    # > Save Samples as $\LaTeX$ tables

    latexify_sample(sdf, cmap=SUP_POLAR_COLOR, neq=(sam=='NEQ'), f_floor=POLAR_ADV_MIN, 
                        comparison_label=get_polar_label('adv')
                        # dry=True, 
                        )

for sam, sdf in adv_mirror_samples.items():
    # > Save Samples as $\LaTeX$ tables

    latexify_sample(sdf, cmap=MIR_POLAR_COLOR, neq=(sam=='NEQ'), f_floor=POLAR_ADV_MIN, 
                        comparison_label=get_polar_label('adv'), 
                        mirror=True
                        )

\cmbr{ALL+} Superset: Adverb Polarity AM Sample
AM=adverb-polarity-super-ALL
N = 71,961,373
max digits = 8
Writing "\cmbr{ALL+} Superset: Adverb Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=adverb-polarity-super-ALL.tex
\cmbr{NEQ+} Superset: Adverb Polarity AM Sample
AM=adverb-polarity-super-NEQ
N = 6,347,362
max digits = 7
Writing "\cmbr{NEQ+} Superset: Adverb Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=adverb-polarity-super-NEQ.tex
\cmbr{ALL+} Mirror Subset: Adverb Polarity AM Sample
AM=adverb-polarity-mirror-ALL
N = 1,680,633
max digits = 7
Writing "\cmbr{ALL+} Mirror Subset: Adverb Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=adverb-polarity-mirror-ALL.tex
\cmbr{NEQ+} Mirror Subset: Adverb Polarity AM Sample
AM=adverb-polarity-mirror-NEQ
N = 583,470
max digits = 6
Writing "\cmbr{NEQ+} Mirr

### Adjective Polarity

In [314]:
all_super_adj = _load_amdf(dataset='direct', sampling='ALL', unit='adj', polar=True, f_floor=POLAR_ADJ_MIN).sort_values(['f2', 'f1'])
neq_super_adj = _load_amdf(dataset='direct', sampling='NEQ', unit='adj', polar=True, f_floor=POLAR_ADJ_MIN).sort_values(['f2', 'f1'])

In [315]:
adj_super_samples = _pull_polar_samples(all_amdf=all_super_adj, neq_amdf=neq_super_adj)

key,NEG~toxic,COM~toxic,NEG~gorgeous,COM~gorgeous
f,675.0,1314.0,700.0,1500.0
f1,3173681.0,3173681.0,3173681.0,3173681.0
f2,1989.0,1989.0,2200.0,2200.0
N,6347362.0,6347362.0,6347362.0,6347362.0
expF,994.5,994.5,1100.0,1100.0
unexpF,-319.5,319.5,-400.0,400.0
unexpR,-0.47,0.24,-0.57,0.27
P1,0.34,0.66,0.32,0.68
P2,0.0,0.0,0.0,0.0
dP1,-0.16,0.16,-0.18,0.18


key,NEG~toxic,COM~toxic,NEG~gorgeous,COM~gorgeous
f,675.0,28448.0,700.0,32156.0
f1,3173681.0,68787692.0,3173681.0,68787692.0
f2,29123.0,29123.0,32856.0,32856.0
N,71961373.0,71961373.0,71961373.0,71961373.0
expF,1284.4,27838.6,1449.03,31406.97
unexpF,-609.4,609.4,-749.03,749.03
unexpR,-0.9,0.02,-1.07,0.02
P1,0.02,0.98,0.02,0.98
P2,0.0,0.0,0.0,0.0
dP1,-0.02,0.02,-0.02,0.02


In [342]:
all_mirror_adj = _load_amdf(dataset='mirror', sampling='ALL', unit='adj', polar=True, f_floor=POLAR_ADJ_MIN).sort_values(['f2', 'f1'])
neq_mirror_adj = _load_amdf(dataset='mirror', sampling='NEQ', unit='adj', polar=True, f_floor=POLAR_ADJ_MIN).sort_values(['f2', 'f1'])

In [343]:
adj_mirror_samples = _pull_polar_samples(all_amdf=all_mirror_adj, neq_amdf=neq_mirror_adj)

key,POS~scared,NEG~scared,POS~free,NEG~free
f,354.0,396.0,799.0,889.0
f1,291735.0,291735.0,291735.0,291735.0
f2,750.0,750.0,1688.0,1688.0
N,583470.0,583470.0,583470.0,583470.0
expF,375.0,375.0,844.0,844.0
unexpF,-21.0,21.0,-45.0,45.0
unexpR,-0.06,0.05,-0.06,0.05
P1,0.47,0.53,0.47,0.53
P2,0.0,0.0,0.0,0.0
dP1,-0.03,0.03,-0.03,0.03


key,NEG~scared,POS~scared,NEG~free,POS~free
f,396.0,1681.0,889.0,3896.0
f1,291735.0,1388898.0,291735.0,1388898.0
f2,2077.0,2077.0,4785.0,4785.0
N,1680633.0,1680633.0,1680633.0,1680633.0
expF,360.54,1716.46,830.61,3954.39
unexpF,35.46,-35.46,58.39,-58.39
unexpR,0.09,-0.02,0.07,-0.01
P1,0.19,0.81,0.19,0.81
P2,0.0,0.0,0.0,0.0
dP1,0.02,-0.02,0.01,-0.01


#### 💾 Save <font color=DarkViolet>Adjective Polarity</font> Samples as $\LaTeX$ tables

In [507]:

# > Save Samples as $\LaTeX$ tables
for sam, sdf in adj_super_samples.items():

    latexify_sample(sdf, cmap=SUP_POLAR_COLOR, neq=(sam=='NEQ'), f_floor=POLAR_ADJ_MIN, 
                        comparison_label=get_polar_label('adj')
                        # dry=True, 
                        )

for sam, sdf in adj_mirror_samples.items():
    # > Save Samples as $\LaTeX$ tables

    latexify_sample(sdf, cmap=MIR_POLAR_COLOR, neq=(sam=='NEQ'), f_floor=POLAR_ADJ_MIN, 
                        comparison_label=get_polar_label('adj'), 
                        mirror=True
                        # dry=True, 
                        )

\cmbr{ALL+} Superset: Adjective Polarity AM Sample
AM=adjective-polarity-super-ALL
N = 71,961,373
max digits = 8
Writing "\cmbr{ALL+} Superset: Adjective Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=adjective-polarity-super-ALL.tex
\cmbr{NEQ+} Superset: Adjective Polarity AM Sample
AM=adjective-polarity-super-NEQ
N = 6,347,362
max digits = 7
Writing "\cmbr{NEQ+} Superset: Adjective Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=adjective-polarity-super-NEQ.tex
\cmbr{ALL+} Mirror Subset: Adjective Polarity AM Sample
AM=adjective-polarity-mirror-ALL
N = 1,680,633
max digits = 7
Writing "\cmbr{ALL+} Mirror Subset: Adjective Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=adjective-polarity-mirror-ALL.tex
\cmbr{NEQ+} Mirror Subset: Adjective Polarity AM Sample
AM=adjective-polarity-mirror-NEQ
N = 583,47

### Bigram Polarity

In [345]:
all_mirror_bigram = _load_amdf(dataset='mirror', sampling='ALL', unit='bigram', polar=True, f_floor=POLAR_BIG_MIN).sort_values(['f2', 'f1'])
neq_mirror_bigram = _load_amdf(dataset='mirror', sampling='NEQ', unit='bigram', polar=True, f_floor=POLAR_BIG_MIN).sort_values(['f2', 'f1'])
all_super_bigram = _load_amdf(dataset='direct', sampling='ALL', unit='bigram', polar=True, f_floor=POLAR_BIG_MIN).sort_values(['f2', 'f1'])
neq_super_bigram = _load_amdf(dataset='direct', sampling='NEQ', unit='bigram', polar=True, f_floor=POLAR_BIG_MIN).sort_values(['f2', 'f1'])

In [365]:
bigram_mirror_samples = _pull_polar_samples(all_amdf=all_mirror_bigram, neq_amdf=neq_mirror_bigram)

key,POS~so_great,NEG~so_great,POS~too_big,NEG~too_big
f,213.0,280.0,377.0,787.0
f1,291735.0,291735.0,291735.0,291735.0
f2,493.0,493.0,1164.0,1164.0
N,583470.0,583470.0,583470.0,583470.0
expF,246.5,246.5,582.0,582.0
unexpF,-33.5,33.5,-205.0,205.0
unexpR,-0.16,0.12,-0.54,0.26
P1,0.43,0.57,0.32,0.68
P2,0.0,0.0,0.0,0.0
dP1,-0.07,0.07,-0.18,0.18


key,NEG~so_great,POS~so_great,NEG~too_big,POS~too_big
f,280.0,979.0,787.0,1754.0
f1,291735.0,1388898.0,291735.0,1388898.0
f2,1259.0,1259.0,2541.0,2541.0
N,1680633.0,1680633.0,1680633.0,1680633.0
expF,218.55,1040.45,441.08,2099.92
unexpF,61.45,-61.45,345.92,-345.92
unexpR,0.22,-0.06,0.44,-0.2
P1,0.22,0.78,0.31,0.69
P2,0.0,0.0,0.0,0.0
dP1,0.05,-0.05,0.14,-0.14


In [383]:
bigram_super_samples = _pull_polar_samples(all_amdf=all_super_bigram, neq_amdf=neq_super_bigram)

key,COM~very_deep,NEG~very_deep,COM~too_bad,NEG~too_bad
f,706.0,896.0,2499.0,19488.0
f1,3173681.0,3173681.0,3173681.0,3173681.0
f2,1602.0,1602.0,21987.0,21987.0
N,6347362.0,6347362.0,6347362.0,6347362.0
expF,801.0,801.0,10993.5,10993.5
unexpF,-95.0,95.0,-8494.5,8494.5
unexpR,-0.13,0.11,-3.4,0.44
P1,0.44,0.56,0.11,0.89
P2,0.0,0.0,0.0,0.01
dP1,-0.06,0.06,-0.39,0.39


key,NEG~very_deep,COM~very_deep,NEG~too_bad,COM~too_bad
f,896.0,15241.0,19488.0,53191.0
f1,3173681.0,68787692.0,3173681.0,68787692.0
f2,16137.0,16137.0,72679.0,72679.0
N,71961373.0,71961373.0,71961373.0,71961373.0
expF,711.68,15425.32,3205.33,69473.67
unexpF,184.32,-184.32,16282.67,-16282.67
unexpR,0.21,-0.01,0.84,-0.31
P1,0.06,0.94,0.27,0.73
P2,0.0,0.0,0.01,0.0
dP1,0.01,-0.01,0.22,-0.22


#### 💾 Save <font color=DodgerBlue>Bigram Polarity</font> Samples as $\LaTeX$ tables


In [508]:
# > Save Samples as $\LaTeX$ tables
# > mirror
for sam, sdf in adj_mirror_samples.items():

    latexify_sample(sdf, cmap=MIR_POLAR_COLOR, 
                    neq=(sam=='NEQ'), f_floor=POLAR_BIG_MIN, 
                        comparison_label=get_polar_label('bigram'), 
                        mirror=True)

# > super
for sam, sdf in adj_super_samples.items():

    latexify_sample(sdf, cmap=SUP_POLAR_COLOR, neq=(sam=='NEQ'), f_floor=POLAR_BIG_MIN, 
                        comparison_label=get_polar_label('bigram'), 
                        mirror=False)

\cmbr{ALL+} Mirror Subset: Bigram Polarity AM Sample
AM=bigram-polarity-mirror-ALL
N = 1,680,633
max digits = 7
Writing "\cmbr{ALL+} Mirror Subset: Bigram Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=bigram-polarity-mirror-ALL.tex
\cmbr{NEQ+} Mirror Subset: Bigram Polarity AM Sample
AM=bigram-polarity-mirror-NEQ
N = 583,470
max digits = 6
Writing "\cmbr{NEQ+} Mirror Subset: Bigram Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=bigram-polarity-mirror-NEQ.tex
\cmbr{ALL+} Superset: Bigram Polarity AM Sample
AM=bigram-polarity-super-ALL
N = 71,961,373
max digits = 8
Writing "\cmbr{ALL+} Superset: Bigram Polarity AM Sample" Table to
  /share/compling/projects/arh234/OverleafDissertex/assets/tables/ch4/samples/AM=bigram-polarity-super-ALL.tex
\cmbr{NEQ+} Superset: Bigram Polarity AM Sample
AM=bigram-polarity-super-NEQ
N = 6,347,362
max digits = 7
Writing "\cmbr{NEQ+