##  Genbit Sample Notebook

### The notebook is intended to provide quick start for users to build on top of the GenBit API.

## Install Genbit   

In [1]:
# %pip install genbit

### Import utilities

In [2]:
import pprint
import genbit
from genbit.genbit_metrics import GenBitMetrics

### Specify Filename Location and Language  
(Accepted codes: EN,IT, RU, FR, DE and ES)

In [3]:
input_file = "imdbpositive.tsv"
language = "en"

In [4]:
metric = GenBitMetrics(language, context_window=30, distance_weight=0.95,
                 percentile_cutoff=80)

In [5]:
contiguous = False
data = []
with open(input_file, "r", encoding="utf-8") as input_sentence_file:
    for line in input_sentence_file:
        if(contiguous):
            line = line.strip()
            data.append(line)
        else:
            data = []
            line = line.strip()
            data.append(line)
            metric.add_data(data, tokenized=False)
if(contiguous):
    metric.add_data(data, tokenized=False)

In [6]:
metrics = metric.get_metrics(output_statistics=True, output_word_list=False)

### Print the results
The results using the test file should show a heavy non-binary bias as many terms in the sample file are gender 'neutral' or explicitly non-binary. You will also observe that in the case of binary bias, there is a slight male-leaning bias due to the more frequently occure male gender definition words throughout the samples in the data.

In [7]:
pprint.pprint(metrics)

{'additional_metrics': {'avg_bias_conditional': 0.34498379111709554,
                        'avg_bias_conditional_absolute': 0.5729149233899209,
                        'avg_bias_ratio': 0.6949949357338604,
                        'avg_bias_ratio_absolute': 0.8047695393084777,
                        'avg_non_binary_bias_conditional': 0.37123926988539446,
                        'avg_non_binary_bias_conditional_absolute': 0.45198253335050115,
                        'avg_non_binary_bias_ratio': 0.9216962853680956,
                        'avg_non_binary_bias_ratio_absolute': 0.9301694999184437,
                        'avg_trans_cis_bias_conditional': -2.6625368080321694,
                        'avg_trans_cis_bias_conditional_absolute': 2.6631617477962806,
                        'avg_trans_cis_bias_ratio': -3.921669865456539,
                        'avg_trans_cis_bias_ratio_absolute': 3.921669865456539,
                        'std_dev_bias_conditional': 0,
                        

###  ----- END -------- 