In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import tcrdist
import matplotlib.pyplot as plt
from tcrdist.repertoire import TCRrep
from tcrdist.html_colors import get_html_colors
from tcrdist.public import _neighbors_fixed_radius
import pwseqdist as pw
import networkx as nx
import community as community_louvain
from itertools import combinations

In [None]:
import sys
sys.path.append('../')

In [None]:
from tcrnet.process import (
    standardize_tcr_data, 
    preprocess_tcr_data, 
    compute_clonotype_abundances
)
from tcrnet.visualize import (
    chain_pairing_configurations,
    sequence_length_distributions,
    clonotype_abundances,
    top_n_clonotypes,
    generate_network_plot
)
from tcrnet.networks import similarity, graph, cluster, metrics

In [None]:
# path to your TCR data
SAMPLE_SIZE = 100000
SAMPLE_ID = 'HIV_01'
clonotype_definition = ['cdr1_aa', 'cdr2_aa', 'cdr3_aa']
tcr_filepath = "pre_placebo_3162_B.csv"

In [None]:
tcr_df = standardize_tcr_data(tcr_filepath=tcr_filepath, technology_platform='Omniscope')

In [None]:
tcr_df = tcr_df.sample(SAMPLE_SIZE)

In [None]:
ptcr_df = preprocess_tcr_data(tcr_df=tcr_df,
                        sample_id='p3162_pre',
                        clonotype_definition=clonotype_definition,
                        chain='beta')

In [None]:
# %debug
qtcr_df = compute_clonotype_abundances(processed_tcr_df=ptcr_df,
                             clonotype_definition=clonotype_definition,
                             chain='beta')

In [None]:
qtcr_df.head()

In [None]:
# network analysis parameters
chain = 'beta'
edge_threshold = 64
clonotype_count_threshold = 2
analysis_mode = 'private'
top_k_clusters = 9

In [None]:
qtcr_df = qtcr_df.loc[qtcr_df['num_records']>1].copy()

In [None]:
qtcr_df['num_records'].value_counts()

In [None]:
# compute distance matrix
ntcr_df, distance_matrix = similarity.compute_tcrdist(qtcr_df = qtcr_df, 
                                                      chain=chain,
                                                      clonotype_definition=clonotype_definition)

In [None]:
network_df = graph.generate_graph_dataframe(ntcr_df=ntcr_df, 
                                            distance_matrix=distance_matrix,
                                            clonotype_definition=clonotype_definition,
                                            chain=chain,
                                            analysis_mode=analysis_mode,
                                            edge_threshold=edge_threshold,
                                            count_threshold=clonotype_count_threshold)
network_df.head()

In [None]:
tcr_graph = graph.create_undirected_graph(net_df=network_df)

In [None]:
partition = cluster.cluster_lovain(net_df=network_df)
cluster2color = cluster.generate_cluster_colors(partition=partition, 
                                                color_top_k_clusters=top_k_clusters)

In [None]:
cluster2color

In [None]:
network_df = graph.update_df_with_cluster_information(net_df=network_df, partition=partition)

In [None]:
net_metrics = metrics.compute_network_metrics(net_df=network_df, 
                                              graph=tcr_graph, 
                                              top_k_clusters=top_k_clusters)

In [None]:
generate_network_plot(graph=tcr_graph, 
                      network_metrics=net_metrics, 
                      partition=partition, 
                      colors=cluster2color,
                      output_filepath="/Users/alaa/Documents/ucsf/data/rutishauser/bmgf_vax/omniscope/clonotype_networks/bmgf_p3162_POST_clonotypes_network_louvain_clusters_edge-threshold64_v1.png",
                      plot_title="p3162 POST",
                      dpi=223)