# Example Pyllelic Use-Case Notebook

## Background

This notebook illustrates the import and use of `pyllelic` in a jupyter environment.

See https://github.com/Paradoxdruid/pyllelic for further details.

## Set-up

In [None]:
import pyllelic

In [None]:
# set up your disk location:
# base_path should be the directory we'll do our work in
# make a sub-directory under base_path with a folder named "test"
# and put the .bam and .bai files in "test"

pyllelic.set_up_env_variables(
    base_path="/Users/abonham/documents/test_allelic/",
    prom_file="TERT-promoter-genomic-sequence.txt",
    prom_start="1293000",
    prom_end="1296000",
    chrom="5",
)

## Main Parsing Functions

In [None]:
files_set = pyllelic.make_list_of_bam_files()  # finds bam files

In [None]:
# Uncomment for debugging:
# files_set

In [None]:
# index bam and creates bam_output folders/files
positions = pyllelic.index_and_fetch(files_set)

In [None]:
# Only needs to be run once, generates static files
pyllelic.genome_parsing()

In [None]:
cell_types = pyllelic.extract_cell_types(files_set)

In [None]:
# Uncomment for debugging
# cell_types

In [None]:
# Set filename to whatever you want
df_list = pyllelic.run_quma_and_compile_list_of_df(cell_types, "tester5.xlsx") # to skip quma: , run_quma=False)

In [None]:
# Uncomment for debugging
# df_list.keys()

In [None]:
means = pyllelic.process_means(df_list, positions, files_set)

In [None]:
# Uncomment for debugging
# means

In [None]:
modes = pyllelic.process_modes(df_list, positions, files_set)

In [None]:
# Uncomment for debugging
# modes

In [None]:
diff = pyllelic.find_diffs(means, modes)

In [None]:
# Uncomment for debugging
# diff

## Write Output to excel files

In [None]:
# Set the filename to whatever you want
pyllelic.write_means_modes_diffs(means, modes, diff, "Test5")

## Visualizing Data

In [None]:
final_data = pyllelic.pd.read_excel(pyllelic.config.base_directory.joinpath("Test5_diff.xlsx"),
        dtype=str,
        index_col=0,
)

In [None]:
final_data

In [None]:
individual_data = pyllelic.return_individual_data(df_list, positions, files_set)

In [None]:
# Uncomment for debugging
# individual_data

In [None]:
pyllelic.pd.DataFrame(individual_data.loc['SW1710'])

In [None]:
pyllelic.histogram(individual_data, 'SW1710', '1295089')