In [91]:
import contextlib
from pathlib import Path

import pandas as pd

from source.utils.am_notebooks import catify, embolden, nb_show_table
from source.utils.dataframes import NEG_REGEX, POS_FEW_REGEX
from source.utils.general import HIT_TABLES_DIR

In [103]:

def show_hit_sample(path, n=1,
                    parts=None,
                    transpose=True,
                    show_following_neg=False,
                    head=None,
                    not_only=False, 
                    positive_few=False) -> pd.DataFrame:
    if not parts:
        parts = ['PccVa', 'Apw']
    path = Path(path)
    if any([p.endswith(('.parq', '.parquet')) for p in path.parts]):

        try:
            _sample = pd.read_parquet(path,
                                    engine='pyarrow', filters=[('part', 'in', parts)])
        except Exception:
            _sample = pd.read_parquet(path)
    elif '.csv' in path.suffixes:
        _sample = catify(pd.read_csv(path, nrows=1500, index_col='hit_id'))
    else:
        _sample = catify(pd.read_pickle(path))
        with contextlib.suppress(AttributeError):
            _sample = _sample.loc[_sample.part.isin(parts), :]
    if not_only: 
        transpose=False
        _sample = _sample.loc[_sample.token_str.str.lower().str.contains('not only'), :].filter(
            ['bigram', 'colloc', 'trigger_lemma', 'trigger_lower', 'neg_lemma', 'adv_form', 'adj_form', 'bigram_lower', 'all_forms_lower', 'hit_text', 'text_window', 'token_str', 'sent_text'])
    if positive_few: 
        _sample = _sample.loc[_sample.token_str.str.lower().str.contains(POS_FEW_REGEX), :]
    if show_following_neg:
        _sample = _sample.loc[_sample.token_str.str.contains(NEG_REGEX), :]
    if head:
        _sample = _sample.loc[(_sample.filter(
            regex=r'[rg]_head').iloc[:, 0]) == head, :]
    _sample = _sample.sample(n)
    for text_col in _sample.filter(['text_window', 'hit_text', 'token_str', 'sent_text']).columns:
        _formatted = '*' + embolden(_sample[text_col], mono=False,
                                    bold_regex=f"({_sample.filter(like='adv_form').iat[0,0]} {_sample.filter(like='adj_form').iat[0,0]})") + '*'
        if 'trigger_lower' in _sample.columns:
            trigger_lower = _sample.trigger_lower.squeeze()
            trigger_regex = r'|'.join(
                [trigger_lower, trigger_lower.capitalize(), trigger_lower.upper()])
            _formatted = _formatted.replace(
                r'\b('+trigger_regex+r')\b', r'`\1`', regex=True)
        _sample[text_col] = _formatted
    if n == 1 and transpose:
        # print(_sample.filter(['all_forms_lower', 'bigram_lower','bigram']).iat[0,0])
        hit_label = f": *{_sample.filter(['all_forms_lower', 'bigram_lower','bigram', 'colloc']).iat[0,0]}*".replace(
            '_', ' ')
    else:
        hit_label = ''
    nb_show_table(
        _sample, transpose=transpose, italics=False,
        title=f'\nSample Hit from `{path.relative_to(HIT_TABLES_DIR)}`{hit_label}\n')
    return _sample

In [40]:
samples = [show_hit_sample(HIT_TABLES_DIR.joinpath('POSmirror/NEQ-POSmirror_final_sample.24080322.parq'), parts=['Pcc00', 'PccVa', 'Nyt1', 'Apw' ])]


Sample Hit from `POSmirror/NEQ-POSmirror_final_sample.24080322.parq`: *some quite reasonable*

|                           | `pcc_eng_00_001.6058_x0009803_17:1-6-7`                                                                                                                                                                                                                                                                                                                                                                  |
|:--------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **`bigram`**              | quite_reasonable        

Sample Hit from `POSmirror/NEQ-POSmirror_final_sample.24072400.parq`: *something so crucial*

|                       | `pcc_eng_val_3.11224_x52656_18:25-26-27`                                                                                                                                   |
|:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **`bigram`**          | so_crucial                                                                                                                                                                 |
| **`sent_text`**       | Even though I had found it in the "household kitchen faucet" section, it was incredibly ignorant to proceed blindly into purchasing `something` __so crucial__.            |
| **`adv_form`**        | so                                                                                                                                                                         |
| **`adj_form`**        | crucial                                                                                                                                                                    |
| **`hit_text`**        | `something` __so crucial__ .                                                                                                                                               |
| **`text_window`**     | ignorant to proceed blindly into purchasing `something` __so crucial__ .                                                                                                   |
| **`sent_id`**         | pcc_eng_val_3.11224_x52656_18                                                                                                                                              |
| **`match_id`**        | 25-26-27                                                                                                                                                                   |
| **`bigram_id`**       | pcc_eng_val_3.11224_x52656_18:26-27                                                                                                                                        |
| **`token_str`**       | Even though I had found it in the " household kitchen faucet " section , it was incredibly ignorant to proceed blindly into purchasing `something` __so crucial__ .        |
| **`lemma_str`**       | *even though I have find it in the " household kitchen faucet " section , it be incredibly ignorant to proceed blindly into purchase something so crucial .*               |
| **`mod_deprel`**      | advmod                                                                                                                                                                     |
| **`mod_head`**        | ADJ                                                                                                                                                                        |
| **`mir_deprel`**      | amod                                                                                                                                                                       |
| **`mir_head`**        | MIR                                                                                                                                                                        |
| **`mir_lemma`**       | something                                                                                                                                                                  |
| **`adv_lemma`**       | so                                                                                                                                                                         |
| **`adj_lemma`**       | crucial                                                                                                                                                                    |
| **`mir_form`**        | something                                                                                                                                                                  |
| **`mir_index`**       | 24                                                                                                                                                                         |
| **`adv_index`**       | 25                                                                                                                                                                         |
| **`adj_index`**       | 26                                                                                                                                                                         |
| **`dep_mod`**         | *{'node': 'mod', 'contiguous': True, 'relation': 'advmod', 'head': {'lemma': 'crucial', 'ix': 26, 'xpos': 'JJ'}, 'target': {'lemma': 'so', 'ix': 25, 'xpos': 'RB'}}*       |
| **`dep_mir`**         | *{'node': 'mir', 'contiguous': False, 'relation': 'amod', 'head': {'lemma': 'something', 'ix': 24, 'xpos': 'NN'}, 'target': {'lemma': 'crucial', 'ix': 26, 'xpos': 'JJ'}}* |
| **`mir_form_lower`**  | something                                                                                                                                                                  |
| **`adv_form_lower`**  | so                                                                                                                                                                         |
| **`adj_form_lower`**  | crucial                                                                                                                                                                    |
| **`utt_len`**         | 28                                                                                                                                                                         |
| **`json_source`**     | /share/compling/data/sanpi/1_json_grew-matches/POSmirror/bigram-PccVa.pos-mirror-L/BIGRAM.pcc_eng_val-03.json                                                              |
| **`bigram_lower`**    | so_crucial                                                                                                                                                                 |
| **`all_forms_lower`** | something_so_crucial                                                                                                                                                       |
| **`pattern`**         | pos-mirror-L                                                                                                                                                               |
| **`category`**        | POSmirror                                                                                                                                                                  |
| **`trigger_lower`**   | something                                                                                                                                                                  |
| **`trigger_lemma`**   | something                                                                                                                                                                  |
| **`dep_distance`**    | 1                                                                                                                                                                          |
| **`window_len`**      | 10                                                                                                                                                                         |
| **`quarantine`**      | False                                                                                                                                                                      |
| **`id_prefix`**       | pcc_eng_val_3                                                                                                                                                              |
| **`part`**            | PccVa                                                                                                                                                                      |




Sample Hit from `POSmirror/NEQ-POSmirror_final_sample.24072400.parq`: *sometimes really enough*

|                       | `pcc_eng_val_3.03145_x39665_10:1-5-6`                                                                                                                                     |
|:----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **`bigram`**          | really_enough                                                                                                                                                             |
| **`sent_text`**       | *`Sometimes`, it is __really enough__ to not say much about your reasons and let your decision speak for itself.*                                                         |
| **`adv_form`**        | really                                                                                                                                                                    |
| **`adj_form`**        | enough                                                                                                                                                                    |
| **`hit_text`**        | *`Sometimes` , it is __really enough__ to not say much*                                                                                                                   |
| **`text_window`**     | *`sometimes` , it is __really enough__ to not say much about your*                                                                                                        |
| **`sent_id`**         | pcc_eng_val_3.03145_x39665_10                                                                                                                                             |
| **`match_id`**        | 1-5-6                                                                                                                                                                     |
| **`bigram_id`**       | pcc_eng_val_3.03145_x39665_10:5-6                                                                                                                                         |
| **`token_str`**       | *`Sometimes` , it is __really enough__ to not say much about your reasons and let your decision speak for itself .*                                                       |
| **`lemma_str`**       | *sometimes , it be really enough to not say much about you reason and let you decision speak for itself .*                                                                |
| **`mod_deprel`**      | advmod                                                                                                                                                                    |
| **`mod_head`**        | ADJ                                                                                                                                                                       |
| **`mir_deprel`**      | advmod                                                                                                                                                                    |
| **`mir_head`**        | ADJ                                                                                                                                                                       |
| **`mir_lemma`**       | sometimes                                                                                                                                                                 |
| **`adv_lemma`**       | really                                                                                                                                                                    |
| **`adj_lemma`**       | enough                                                                                                                                                                    |
| **`mir_form`**        | Sometimes                                                                                                                                                                 |
| **`mir_index`**       | 0                                                                                                                                                                         |
| **`adv_index`**       | 4                                                                                                                                                                         |
| **`adj_index`**       | 5                                                                                                                                                                         |
| **`dep_mod`**         | *{'node': 'mod', 'contiguous': True, 'relation': 'advmod', 'head': {'lemma': 'enough', 'ix': 5, 'xpos': 'JJ'}, 'target': {'lemma': 'really', 'ix': 4, 'xpos': 'RB'}}*     |
| **`dep_mir`**         | *{'node': 'mir', 'contiguous': False, 'relation': 'advmod', 'head': {'lemma': 'enough', 'ix': 5, 'xpos': 'JJ'}, 'target': {'lemma': 'sometimes', 'ix': 0, 'xpos': 'RB'}}* |
| **`mir_form_lower`**  | sometimes                                                                                                                                                                 |
| **`adv_form_lower`**  | really                                                                                                                                                                    |
| **`adj_form_lower`**  | enough                                                                                                                                                                    |
| **`utt_len`**         | 21                                                                                                                                                                        |
| **`json_source`**     | /share/compling/data/sanpi/1_json_grew-matches/POSmirror/bigram-PccVa.pos-mirror-R/BIGRAM.pcc_eng_val-03.json                                                             |
| **`bigram_lower`**    | really_enough                                                                                                                                                             |
| **`all_forms_lower`** | sometimes_really_enough                                                                                                                                                   |
| **`pattern`**         | pos-mirror-R                                                                                                                                                              |
| **`category`**        | POSmirror                                                                                                                                                                 |
| **`trigger_lower`**   | sometimes                                                                                                                                                                 |
| **`trigger_lemma`**   | sometimes                                                                                                                                                                 |
| **`dep_distance`**    | 4                                                                                                                                                                         |
| **`window_len`**      | 12                                                                                                                                                                        |
| **`quarantine`**      | False                                                                                                                                                                     |
| **`id_prefix`**       | pcc_eng_val_3                                                                                                                                                             |
| **`part`**            | PccVa                                                                                                                                                                     |


In [50]:
show_hit_sample(HIT_TABLES_DIR.joinpath('NEGmirror/condensed/bigram-PccVa_all-NEGmirror_unique-bigram-id_hits.pkl.gz' ))


Sample Hit from `NEGmirror/condensed/bigram-PccVa_all-NEGmirror_unique-bigram-id_hits.pkl.gz`: *rarely as attractive*

|                           | `pcc_eng_val_2.04602_x23715_22:18-19-20`                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
|:--------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Unnamed: 0_level_0,colloc,sent_text,neg_form,adv_form,adj_form,hit_text,text_window,sent_id,match_id,bigram_id,...,dep_mod,neg_form_lower,adv_form_lower,adj_form_lower,utt_len,json_source,bigram_lower,all_forms_lower,pattern,category
hit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
pcc_eng_val_2.04602_x23715_22:18-19-20,as_attractive,"*But second more than anything else, the inter...",rarely,as,attractive,*rarely __as attractive__ as they appear in*,*famous Hollywood stars are rarely __as attrac...,pcc_eng_val_2.04602_x23715_22,18-19-20,pcc_eng_val_2.04602_x23715_22:19-20,...,"{'node': 'mod', 'contiguous': True, 'relation'...",rarely,as,attractive,27,/share/compling/data/sanpi/1_json_grew-matches...,as_attractive,rarely_as_attractive,neg-mirror-R,NEGmirror


In [104]:
show_hit_sample(HIT_TABLES_DIR.joinpath('RBdirect/condensed/Pcc00_all-RBdirect_unique-bigram-id_hits.csv.bz2' ), 
                n=5, not_only=True)


Sample Hit from `RBdirect/condensed/Pcc00_all-RBdirect_unique-bigram-id_hits.csv.bz2`

|                                               | `colloc`         | `neg_lemma`   | `adv_form`   | `adj_form`   | `hit_text`                                                         | `text_window`                                                                   | `token_str`                                                                                                                                                                           | `sent_text`                                                                                                                                                                      |
|:----------------------------------------------|:-----------------|:--------------|:-------------|:-------------|:-------------------------------------------------------------------|:--------------------------------------------------------------------------------|:-----------------

Unnamed: 0_level_0,colloc,neg_lemma,adv_form,adj_form,hit_text,text_window,token_str,sent_text
hit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
pcc_eng_00_001.5549_x0008939_2:1-6-7,extremely_good,not,extremely,good,*Not only are our distributors __extremely goo...,*Not only are our distributors __extremely goo...,*Not only are our distributors __extremely goo...,*Not only are our distributors __extremely goo...
pcc_eng_00_001.1946_x0003163_12:24-25-26,only_unstoppable,not,only,unstoppable,"*not only unstoppable , it is gathering*","*of the world is not only unstoppable , it is ...",*Julia Gillard is absolutely right in saying :...,*Julia Gillard is absolutely right in saying: ...
pcc_eng_00_001.0817_x0001331_058:12-13-14,only_onerous,not,only,onerous,"*not only onerous , but make demands*","*, the bills are not only onerous , but make d...","*As far as drug makers are concerned , the bil...","*As far as drug makers are concerned, the bill..."
pcc_eng_00_002.1940_x0019263_25:08-09-10,only_applicable,not,only,applicable,"*not only applicable to Coca- Cola ,*","*, the infographic is not only applicable to C...","*According to Naik , the infographic is not on...","*According to Naik, the infographic is not onl..."
pcc_eng_00_001.5458_x0008792_35:11-13-14,budget-_friendly,not,budget-,friendly,"*not only budget- friendly -- it's dependable ,*","*Stewartstown , Pennsylvania is not only budge...","*With DISH , high-speed Internet in Stewartsto...","*With DISH, high-speed Internet in Stewartstow..."


In [61]:
samples.append(show_hit_sample(HIT_TABLES_DIR.joinpath('not-RBdirect/NEQ_not-RBdirect_sample.24080200.parq'), show_following_neg=True))

  _sample = _sample.loc[_sample.token_str.str.contains(NEG_REGEX), :]



Sample Hit from `not-RBdirect/NEQ_not-RBdirect_sample.24080200.parq`: *often sufficient*

|                      | `apw_eng_20020113_0760_15:4-5`                                                                                    |
|:---------------------|:------------------------------------------------------------------------------------------------------------------|
| **`adv_form`**       | often                                                                                                             |
| **`adj_form`**       | sufficient                                                                                                        |
| **`text_window`**    | *while this is __often sufficient__ , it was not in this*                                                         |
| **`token_str`**      | *while this is __often sufficient__ , it was not in this game , given the superior placement of Adams ' pieces .* |
| **`adv_lemma`**      | often                                 


Sample Hit from `not-RBdirect/NEQ_not-RBdirect_sample.24080200.parq`

|                      | `pcc_eng_val_2.01745_x19038_13:24-25`                                                                                                                         |
|:---------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **`adv_form`**       | pretty                                                                                                                                                        |
| **`adj_form`**       | rare                                                                                                                                                          |
| **`text_window`**    | , female- focused surf films are still __pretty rare__ .                                                                                                      |
| **`token_str`**      | Despite the huge population of women surfers that grace virtually every break on the planet nowadays , female- focused surf films are still __pretty rare__ . |
| **`adv_lemma`**      | pretty                                                                                                                                                        |
| **`adj_lemma`**      | rare                                                                                                                                                          |
| **`adv_index`**      | 23                                                                                                                                                            |
| **`utt_len`**        | 26                                                                                                                                                            |
| **`adv_form_lower`** | pretty                                                                                                                                                        |
| **`adj_form_lower`** | rare                                                                                                                                                          |
| **`bigram_lower`**   | pretty_rare                                                                                                                                                   |
| **`window_len`**     | 10                                                                                                                                                            |
| **`chunk`**          | 1                                                                                                                                                             |
| **`id_prefix`**      | pcc_eng_val_2                                                                                                                                                 |
| **`part`**           | PccVa                                                                                                                                                         |

Sample Hit from `not-RBdirect/NEQ_not-RBdirect_sample.24080200.parq`: *often sufficient*

|                      | `apw_eng_20020113_0760_15:4-5`                                                                                    |
|:---------------------|:------------------------------------------------------------------------------------------------------------------|
| **`adv_form`**       | often                                                                                                             |
| **`adj_form`**       | sufficient                                                                                                        |
| **`text_window`**    | *while this is __often sufficient__ , it was not in this*                                                         |
| **`token_str`**      | *while this is __often sufficient__ , it was not in this game , given the superior placement of Adams ' pieces .* |
| **`adv_lemma`**      | often                                                                                                             |
| **`adj_lemma`**      | sufficient                                                                                                        |
| **`adv_index`**      | 3                                                                                                                 |
| **`utt_len`**        | 22                                                                                                                |
| **`adv_form_lower`** | often                                                                                                             |
| **`adj_form_lower`** | sufficient                                                                                                        |
| **`bigram_lower`**   | often_sufficient                                                                                                  |
| **`window_len`**     | 11                                                                                                                |
| **`chunk`**          | 2                                                                                                                 |
| **`id_prefix`**      | apw_eng_2002                                                                                                      |
| **`part`**           | Apw                                                                                                               |


Sample Hit from `not-RBdirect/NEQ_not-RBdirect_sample.24080200.parq`: *so many*

|                      | `pcc_eng_val_1.4949_x08088_60:09-10`                                                                                                                                      |
|:---------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **`adv_form`**       | so                                                                                                                                                                        |
| **`adj_form`**       | many                                                                                                                                                                      |
| **`text_window`**    | *teammates do tease brissett a bit about __so many__ expectations being placed at his feet*                                                                               |
| **`token_str`**      | *While teammates do tease Brissett a bit about __so many__ expectations being placed at his feet , Hines said that Brissett does n't seem to feel any sort of pressure .* |
| **`adv_lemma`**      | so                                                                                                                                                                        |
| **`adj_lemma`**      | many                                                                                                                                                                      |
| **`adv_index`**      | 8                                                                                                                                                                         |
| **`utt_len`**        | 31                                                                                                                                                                        |
| **`adv_form_lower`** | so                                                                                                                                                                        |
| **`adj_form_lower`** | many                                                                                                                                                                      |
| **`bigram_lower`**   | so_many                                                                                                                                                                   |
| **`window_len`**     | 15                                                                                                                                                                        |
| **`chunk`**          | 1                                                                                                                                                                         |
| **`id_prefix`**      | pcc_eng_val_1                                                                                                                                                             |
| **`part`**           | PccVa                                                                                                                                                                     |

In [124]:
show_hit_sample(HIT_TABLES_DIR.joinpath('RBdirect/ALL-RBdirect_final.parq'))


Sample Hit from `RBdirect/ALL-RBdirect_final.parq`: *n't especially glamorous*

|                          | `pcc_eng_val_3.02703_x38921_190:7-8-9`                                                                                                                                                                                                                                                                                                   |
|:-------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **`bigram`**             | especially_glamorous                                                                                                                                                      

Unnamed: 0_level_0,bigram,sent_text,neg_form,adv_form,adj_form,hit_text,text_window,sent_id,match_id,bigram_id,...,bigram_lower,neg_form_lower,trigger_lower,trigger_lemma,all_forms_lower,dep_distance,window_len,quarantine,id_prefix,part
hit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
pcc_eng_val_3.02703_x38921_190:7-8-9,especially_glamorous,*This is another job that isn't __especially g...,n't,especially,glamorous,"*`n't` __especially glamorous__ , but it 's*",*this is another job that is `n't` __especiall...,pcc_eng_val_3.02703_x38921_190,7-8-9,pcc_eng_val_3.02703_x38921_190:8-9,...,especially_glamorous,n't,n't,not,n't_especially_glamorous,1,15,False,pcc_eng_val_3,PccVa


In [126]:
show_hit_sample(HIT_TABLES_DIR.joinpath('NEGmirror/ALL-NEGmirror_final.parq'))


Sample Hit from `NEGmirror/ALL-NEGmirror_final.parq`: *never quite comfortable*

|                           | `pcc_eng_val_3.04925_x42562_13:08-09-10`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
|:--------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Unnamed: 0_level_0,bigram,sent_text,neg_form,adv_form,adj_form,hit_text,text_window,sent_id,match_id,bigram_id,...,all_forms_lower,pattern,category,trigger_lower,trigger_lemma,dep_distance,window_len,quarantine,id_prefix,part
hit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
pcc_eng_val_3.04925_x42562_13:08-09-10,quite_comfortable,"*For whatever reason, little Ted was `never` _...",never,quite,comfortable,*`never` __quite comfortable__ about being bor...,"*whatever reason , little ted was `never` __qu...",pcc_eng_val_3.04925_x42562_13,08-09-10,pcc_eng_val_3.04925_x42562_13:09-10,...,never_quite_comfortable,neg-mirror-R,NEGmirror,never,never,1,13,False,pcc_eng_val_3,PccVa


In [135]:
show_hit_sample(HIT_TABLES_DIR.joinpath('POSmirror/ALL-POSmirror_final.parq'), head='MIR').T


Sample Hit from `POSmirror/ALL-POSmirror_final.parq`: *something fairly bad*

|                           | `pcc_eng_val_1.9883_x16019_07:10-11-12`                                                                                                                               |
|:--------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **`bigram`**              | fairly_bad                                                                                                                                                            |
| **`sent_text`**           | *It seemed clear to me that Santi had done `something` __fairly bad__ to his knee.*                                                                                   |
| **`adv_form`**            | fairly                                                                                             

hit_id,pcc_eng_val_1.9883_x16019_07:10-11-12
bigram,fairly_bad
sent_text,*It seemed clear to me that Santi had done `so...
adv_form,fairly
adj_form,bad
hit_text,*`something` __fairly bad__ to his knee .*
text_window,*to me that santi had done `something` __fairl...
sent_id,pcc_eng_val_1.9883_x16019_07
match_id,10-11-12
bigram_id,pcc_eng_val_1.9883_x16019_07:11-12
token_str,*It seemed clear to me that Santi had done `so...


In [84]:
show_hit_sample(HIT_TABLES_DIR.joinpath('RBXadj/cleaned/clean_PccVa_rb-bigram_hits.parq'), parts=['PccVa'])


Sample Hit from `RBXadj/cleaned/clean_PccVa_rb-bigram_hits.parq`: *so harmful*

|                      | `pcc_eng_val_1.6689_x10841_17:13-14`                                                                                   |
|:---------------------|:-----------------------------------------------------------------------------------------------------------------------|
| **`adv_form`**       | so                                                                                                                     |
| **`adj_form`**       | harmful                                                                                                                |
| **`text_window`**    | *properties of plastics , which make them __so harmful__ when they end up in the*                                      |
| **`token_str`**      | *But it is these useful properties of plastics , which make them __so harmful__ when they end up in the environment .* |
| **`adv_lemma`**      | so                

Unnamed: 0_level_0,adv_form,adj_form,text_window,token_str,adv_lemma,adj_lemma,adv_index,utt_len,adv_form_lower,adj_form_lower,bigram_lower,window_len,id_prefix
hit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
pcc_eng_val_1.6689_x10841_17:13-14,so,harmful,"*properties of plastics , which make them __so...",*But it is these useful properties of plastics...,so,harmful,12,22,so,harmful,so_harmful,15,pcc_eng_val_1



Sample Hit from `RBXadj/cleaned/clean_PccVa_rb-bigram_hits.parq`: *so harmful*

|                      | `pcc_eng_val_1.6689_x10841_17:13-14`                                                                                   |
|:---------------------|:-----------------------------------------------------------------------------------------------------------------------|
| **`adv_form`**       | so                                                                                                                     |
| **`adj_form`**       | harmful                                                                                                                |
| **`text_window`**    | *properties of plastics , which make them __so harmful__ when they end up in the*                                      |
| **`token_str`**      | *But it is these useful properties of plastics , which make them __so harmful__ when they end up in the environment .* |
| **`adv_lemma`**      | so                                                                                                                     |
| **`adj_lemma`**      | harmful                                                                                                                |
| **`adv_index`**      | 12                                                                                                                     |
| **`utt_len`**        | 22                                                                                                                     |
| **`adv_form_lower`** | so                                                                                                                     |
| **`adj_form_lower`** | harmful                                                                                                                |
| **`bigram_lower`**   | so_harmful                                                                                                             |
| **`window_len`**     | 15                                                                                                                     |
| **`id_prefix`**      | pcc_eng_val_1                                                                                                          |



In [90]:
show_hit_sample(HIT_TABLES_DIR.joinpath('RBdirect/cleaned/clean_Pcc25_RBdirect_hits.csv.bz2'), n=5, 
                not_only=True)

AttributeError: 'Series' object has no attribute 'capitalize'