In [None]:
from tcrnet import process, visualize

In [None]:
# path to your TCR data
tcr_filepath = "/krummellab/data1/danb/tcrnet_issue/data/SARCOID1-POOL-GC1-SCT1/filtered_contig_annotations.csv"

In [None]:
clonotype_definition = ['cdr1', 'cdr2', 'cdr3']

In [None]:
# load TCR data and standardize the format
tcr_df = process.standardize_tcr_data(tcr_filepath=tcr_filepath, 
                                      clonotype_definition=clonotype_definition,
                                      technology_platform='10X')

In [None]:
# tcr_df.columns

In [None]:
# generate QC plot showing the different alpha-beta pairing configurations in the data
visualize.chain_pairing_configurations(tcr_df=tcr_df)

In [None]:
# preprocess TCR data (chain pairing, QC, and clonotype definition)
ptcr_df = process.preprocess_tcr_data(tcr_df=tcr_df)

In [None]:
# compute clonotype abundances (absolute counts and relative frequencies)
qtcr_df = process.compute_clonotype_abundances(processed_tcr_df=ptcr_df)

In [None]:
qtcr_df

In [None]:
# generate panel of bar plots showing sequence length distribution across complementarity determining regions
visualize.sequence_length_distributions(tcr_df=qtcr_df, 
                                        seq_len_colnames=['alpha_cdr1_aa_length', 
                                                          'alpha_cdr2_aa_length', 
                                                          'alpha_cdr3_aa_length', 
                                                          'beta_cdr1_aa_length', 
                                                          'beta_cdr2_aa_length', 
                                                          'beta_cdr3_aa_length'])

In [None]:
# generate histogram of clonotype abundances (most will likely have count = 1)
visualize.clonotype_abundances(tcr_df=qtcr_df)

In [None]:
# it is usually more helpful to look at clonotype abundances for clonotypes with counts > 1
visualize.clonotype_abundances(tcr_df=qtcr_df.loc[qtcr_df['num_records']>1])

In [None]:
# visualize the top clonotypes by relative abundance
visualize.top_n_clonotypes(tcr_df=qtcr_df, top_n=13)