## Quick Start Demo -- Calculating Overlap 
### Load the required pacakges for this notebook ! 
None of this code needs to be changed. All places where changes are needed are indicated as Steps throughout this notebook.

In [1]:
%%capture
%matplotlib inline

import itertools
import os

import pandas as pd
from scipy.spatial.distance import cosine, jaccard
import seaborn as sns
from pyclonal.io import combineFiles

from IPython.html.widgets import interact

# Step 1 : Change file path to input your data

In [2]:
#combine input files
directory = "../sample_input_files/"
pattern = "D*.changeo_small_demo.tsv"

df, seq_df = combineFiles(directory,pattern)


../sample_input_files/D233.changeo_small_demo.tsv looks like a changeo file
../sample_input_files/D255.changeo_small_demo.tsv looks like a changeo file
../sample_input_files/D280.changeo_small_demo.tsv looks like a changeo file
../sample_input_files/D287.changeo_small_demo.tsv looks like a changeo file
../sample_input_files/D299.changeo_small_demo.tsv looks like a changeo file


# Step 2 : Change file path to input your metadata that described your samples

In [3]:
metadata=pd.read_csv('../sample_input_files/metadata_demo.csv',index_col='filename')

In [4]:
metadata.head()

Unnamed: 0_level_0,patient_id,tissue_id,subset_1
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D233_1,D233,Thymus,Naive
D233_2,D233,Spleen,Naive
D233_3,D233,BM,Naive
D233_4,D233,Bld,Naive
D233_5,D233,Thymus,Memory


In [5]:
merged_df=df.join(metadata)

# Step 3: Specify which sample group you want to look at first

In this case it is the patient that we are specifying

In [6]:
patient='D233'
filtered_df=merged_df[merged_df['patient_id']==patient] #change here
filtered_df = filtered_df.iloc[:, :-len(metadata.columns)]

In [7]:
res_df = {}
for l1, l2 in itertools.combinations(filtered_df.index, 2):
    res_df.setdefault(l2, {})[l1] = res_df.setdefault(l1, {})[l2] = 1 - cosine(
        list(filtered_df.loc[l1].values),
        list(filtered_df.loc[l2].values),
    )
        
cosine_data=pd.DataFrame(res_df).fillna(1)  #matrix with cosine differences

In [8]:
res_df = {}
for l1, l2 in itertools.combinations(filtered_df.index, 2):
    res_df.setdefault(l2, {})[l1] = res_df.setdefault(l1, {})[l2] =jaccard(
        list(filtered_df.loc[l1].values),
        list(filtered_df.loc[l2].values),
    )
        
jaccard_data=pd.DataFrame(res_df).fillna(1)  #matrix with jaccard differences

In [9]:
def plot_heatmap(method):
    METRIC = method
    if METRIC=='cosine':data=cosine_data
    if METRIC=='jaccard':data=jaccard_data
    
    
    #fig, ax = plt.subplots(figsize=(12, 12))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.clustermap(data=data,cmap=cmap)

In [10]:
interact(plot_heatmap, method=['jaccard', 'cosine'])

interactive(children=(Dropdown(description='method', options=('jaccard', 'cosine'), value='jaccard'), Output()…

<function __main__.plot_heatmap(method)>