Generate chord diagrams to look at interactions between clones in tumours

We use holoviews with a matplotlib backend. 

Initial part of the code runs through how to prepare the data for generating the graphs.

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from itertools import combinations
from collections import Counter

#print software versions

print("Pandas version: ", pd.__version__)
print("Seaborn version: ", sns.__version__)
print("Matplotlib version: ", plt.matplotlib.__version__)
print("Numpy version: ", np.__version__)

import sys
print("Python version: ", sys.version)


merged_df = pd.read_csv("./MFP_IV_combined_raw_data.csv")
 

#define a function to get get radius given volume 
def get_radius(volume):
    return (3*volume/(4*np.pi))**(1/3)

#add a column for radius
merged_df['radius_met'] = merged_df['Volume corrected'].apply(get_radius)
#normalized min distance to radius_met
merged_df['min_dist_norm'] = merged_df['min_dist'] / merged_df['radius_met']
merged_df.head()

Pandas version:  2.2.1
Seaborn version:  0.13.2
Matplotlib version:  3.8.4
Numpy version:  1.23.4
Python version:  3.10.14 | packaged by Anaconda, Inc. | (main, Mar 21 2024, 16:20:14) [MSC v.1916 64 bit (AMD64)]


  merged_df = pd.read_csv("./MFP_IV_combined_raw_data.csv")


Unnamed: 0,MET_ID,1,10,11,100,101,110,111,Volume corrected,min_dist,...,treatment,total_lung_volume,vessel_volume,vessel_surface_area,minor_axis,major_axis,elongation,met_type_cat,radius_met,min_dist_norm
0,28.0,0.011735,0.167993,0.0,0.51105,0.0,0.309223,0.0,220226000.0,0.0,...,IV,27758500000.0,852975600.0,151017000.0,161.040177,337.0,2.092645,polyclonal,374.622027,0.0
1,50.0,0.998808,0.001192,0.0,0.0,0.0,0.0,0.0,65635.23,282.43201,...,IV,27758500000.0,852975600.0,151017000.0,13.0,18.336258,1.410481,polyclonal,25.023581,11.286634
2,89.0,0.175399,0.099498,0.0,0.545721,0.179381,0.0,0.0,124651700.0,15.761827,...,IV,27758500000.0,852975600.0,151017000.0,167.418006,376.0,2.245876,polyclonal,309.886884,0.050863
3,119.0,0.974494,0.001366,0.0,0.02414,0.0,0.0,0.0,1326004.0,0.0,...,IV,27758500000.0,852975600.0,151017000.0,37.469744,55.0,1.467851,polyclonal,68.153062,0.0
4,125.0,0.975769,0.000219,0.0,0.024012,0.0,0.0,0.0,10163210.0,0.0,...,IV,27758500000.0,852975600.0,151017000.0,76.533946,111.0,1.450337,polyclonal,134.373652,0.0


To generate chord diagrams, we need to get all possible combinations of interactions between MET clones

So, if a met has [1,4,5] clones, then all possible combinations are [1,4], [1,5] and [4,5].. 

If we have [1,4,5] and [4,5,6], then all possible combinations are [1,4], [1,5], [4,5], [4,5], [5,6], [4,6]. 

If we count number of interactions:
- `[1,4]: 1`
- `[1,5]: 1  `
- `[4,5]: 2  `
- `[5,6]: 1  `
- `[4,6]: 1`

This information is used to generate a graph for the chord diagram

## Polyclonal only

Get polyclonal Mets only for chord graph, as only polyclonal have multiple METs

In [3]:
poly_only = merged_df[merged_df['met_type_cat'] == 'polyclonal']
clone_type_df = poly_only[['clone_type']].astype(str)
clone_type_df.head()

Unnamed: 0,clone_type
0,1_2_4_6
1,1_2
2,1_2_4_5
3,1_2_4
4,1_2_4


Create a column 'clone_list' where the clone_type is converted into a list of clone numbers, so 1_4_5 becomes [1,4,5]

In [4]:
#split rows with "_" and if it doesn't have _, then add value in clone_type column
clone_type_df['clone_list'] = clone_type_df['clone_type'].apply(lambda x: x.split('_') if '_' in x else [x])
clone_type_df.head()

Unnamed: 0,clone_type,clone_list
0,1_2_4_6,"[1, 2, 4, 6]"
1,1_2,"[1, 2]"
2,1_2_4_5,"[1, 2, 4, 5]"
3,1_2_4,"[1, 2, 4]"
4,1_2_4,"[1, 2, 4]"


For each row, we get all get all possible combinations of pairs in that row. 

So, if a met has [1,4,5] clones, then all possible combinations are [1,4], [1,5] and [4,5].. 

The Counter function will count the number of occurences of each pair in all of the data

In [5]:
#For each row, get all possible combinations of pairs
all_pairs = [tuple(sorted(pair)) for sublist in clone_type_df['clone_list'] for pair in combinations(sublist, 2)]
comb_counter = Counter(all_pairs)
comb_counter

Counter({('1', '4'): 964,
         ('1', '2'): 434,
         ('2', '4'): 383,
         ('1', '3'): 134,
         ('3', '4'): 102,
         ('2', '3'): 73,
         ('1', '6'): 63,
         ('4', '6'): 62,
         ('4', '5'): 62,
         ('1', '5'): 60,
         ('1', '7'): 57,
         ('4', '7'): 56,
         ('2', '6'): 41,
         ('2', '7'): 36,
         ('2', '5'): 25,
         ('3', '7'): 17,
         ('3', '6'): 15,
         ('6', '7'): 15,
         ('5', '7'): 6,
         ('5', '6'): 6,
         ('3', '5'): 6})

In [6]:
#save combinations to a dataframe
comb_df = pd.DataFrame.from_dict(comb_counter, orient='index').reset_index()
comb_df.columns = ['pair', 'count']
import os 
#create a dir fig4 in final_graphs
os.makedirs("./final_graphs/fig4", exist_ok=True)
comb_df.to_csv("./final_graphs/fig4/pair_combinations_combined.csv", index=False)

Convert the above to a dataframe and make sure count column is at the end

In [7]:
connections = pd.DataFrame(comb_counter.items(), columns=['pair', 'count'])
connections[['clone_a', 'clone_b']] = pd.DataFrame(connections['pair'].tolist(), index=connections.index)
connections.drop(columns=['pair'], inplace=True)
connections = connections.astype(int)
#put the Count column at the end
connections = connections[['clone_a', 'clone_b', 'count']]
connections.head()

Unnamed: 0,clone_a,clone_b,count
0,1,2,434
1,1,4,964
2,1,6,63
3,2,4,383
4,2,6,41


We use holoviews with a matplotlib backend for generating the chord diagram

In [11]:
import holoviews as hv
import matplotlib.colors as mcolors
print("Holoviews version: ", hv.__version__)
hv.extension('matplotlib')
hv.output(fig='svg', size=250)

Holoviews version:  1.20.2


Using holoviews and matplotlib backend, create a chord diagram

In [12]:
#Using the connections df from above, create a Chord plot
hv.Chord(connections)


Now, we need to add labels to the chord diagram

In [13]:
nodes = hv.Dataset(pd.DataFrame.from_records([
    {'index': 1, 'name': "1"},
    {'index': 2, 'name': "2"},
    {'index': 3, 'name': "3"},
    {'index': 4, 'name': "4"},
    {'index': 5, 'name': "5"},
    {'index': 6, 'name': "6"},
    {'index': 7, 'name': "7"}
]), 'index')

hv.Chord((connections, nodes)).opts(hv.opts.Chord(labels='name'))


Lets add some colour to it better visualize the interactions between clone

In [None]:

#Use a colourmap to for the nodes
color_map = ['cyan','yellow','lightgreen','magenta','purple','orange','lightgrey']
# Convert color names to hex values
hex_colors = [mcolors.to_hex(color) for color in color_map]

img = hv.Chord((connections, nodes)).opts(
    hv.opts.Chord(cmap=hex_colors, 
    edge_color=hv.dim('clone_a').astype(str),
    labels='name',
    node_color=hv.dim('index').astype(str),)
    #width=500,
    #height=500)
    )

img

#hv.save(img, './final_graphs/fig4/chord_diag_combined.svg', fmt='svg')


Now we know how to generate a chord diagram with labels and colours, lets generate these for IV and MFP. 

We will also generate one for each animal

### Generate chord diagram for each treatment (IV and MFP)

Here we wrap the code from above into a `generate_connections` function for simplicity

In [14]:

hv.output(fig='svg', size=250)


def generate_connections(filtered_df):
    clone_type_df = filtered_df[['clone_type']].astype(str)
    #split rows with "_" and if it doesn't have _, then add value in clone_type column
    clone_type_df['clone_list'] = clone_type_df['clone_type'].apply(lambda x: x.split('_') if '_' in x else [x])
    #clone_type_df['clone_list'] = clone_type_df['clone_type'].str.split('_')
    all_pairs = [tuple(sorted(pair)) for sublist in clone_type_df['clone_list'] for pair in combinations(sublist, 2)]
    comb_counter = Counter(all_pairs)
    connections = pd.DataFrame(comb_counter.items(), columns=['pair', 'count'])
    connections[['clone_a', 'clone_b']] = pd.DataFrame(connections['pair'].tolist(), index=connections.index)
    connections.drop(columns=['pair'], inplace=True)
    connections = connections.astype(int)
    #put the Count column at the end
    connections = connections[['clone_a', 'clone_b', 'count']]
    connections.head()
    return connections,comb_counter

#generate nodes based on clone number
nodes = hv.Dataset(pd.DataFrame.from_records([
                                            {'index': 1, 'name': "1"},
                                            {'index': 2, 'name': "2"},
                                            {'index': 3, 'name': "3"},
                                            {'index': 4, 'name': "4"},
                                            {'index': 5, 'name': "5"},
                                            {'index': 6, 'name': "6"},
                                            {'index': 7, 'name': "7"}
                                        ]), 'index')

#Use a colourmap to for the nodes
color_map = ['cyan','yellow','lightgreen','magenta','purple','orange','lightgrey']
# Convert color names to hex values
hex_colors = [mcolors.to_hex(color) for color in color_map]

poly_only = merged_df[merged_df['met_type_cat'] == 'polyclonal']
clone_type_df_iv= poly_only[poly_only['treatment'] == 'IV'][['clone_type']]
clone_type_df_mfp = poly_only[poly_only['treatment'] == 'MFP'][['clone_type']]

connections_iv,iv_pairs  = generate_connections(clone_type_df_iv)
connections_mfp,mfp_pairs = generate_connections(clone_type_df_mfp)

#save combinations to a dataframe
iv_pairs_df = pd.DataFrame.from_dict(iv_pairs, orient='index').reset_index()
iv_pairs_df.columns = ['pair', 'count']
iv_pairs_df.to_csv("./final_graphs/fig4/IV_pair_combinations_combined.csv", index=False)

#mfp
mfp_pairs_df = pd.DataFrame.from_dict(mfp_pairs, orient='index').reset_index()
mfp_pairs_df.columns = ['pair', 'count']
mfp_pairs_df.to_csv("./final_graphs/fig4/MFP_pair_combinations_combined.csv", index=False)

In [15]:
chord_iv = hv.Chord((connections_iv, nodes)).opts(
    hv.opts.Chord(cmap=hex_colors, 
    edge_color=hv.dim('clone_a').astype(str),
    labels='name',
    node_color=hv.dim('index').astype(str),)
    )
chord_iv = chord_iv.relabel('Chord Diagram for IV')
hv.save(chord_iv, './final_graphs/fig4/chord_diag_IV.svg', fmt='svg')
chord_iv

In [16]:
chord_mfp = hv.Chord((connections_mfp, nodes)).opts(
    hv.opts.Chord(cmap=hex_colors, 
    edge_color=hv.dim('clone_a').astype(str),
    labels='name',
    node_color=hv.dim('index').astype(str),)
    )
#add a title to the plot
chord_mfp = chord_mfp.relabel('Chord Diagram for MFP')
hv.save(chord_mfp, './final_graphs/fig4/chord_diag_MFP.svg', fmt='svg')
chord_mfp

Generate a chord diagram for each animal

In [17]:
#Generate chord diagram for each mouse id 
ids = merged_df.id.unique()


chord_list = []

for id in ids:
    print(id)
    clone_type_df_id= poly_only[poly_only['id'] == id][['clone_type']]
    connections_id,id_pairs  = generate_connections(clone_type_df_id)
    #save combinations to a dataframe
    id_pairs_df = pd.DataFrame.from_dict(id_pairs, orient='index').reset_index()
    id_pairs_df.columns = ['pair', 'count']
    id_pairs_df.to_csv(f"final_graphs/fig4/{id}_pair_combinations_combined.csv", index=False)

    chord_id = hv.Chord((connections_id, nodes)).opts(
        hv.opts.Chord(cmap=hex_colors, 
        edge_color=hv.dim('clone_a').astype(str),
        labels='name',
        node_color=hv.dim('index').astype(str),)
        )
    chord_id = chord_id.relabel(f'Chord Diagram for {id}')
    #hv.save(chord_id, f'./final_graphs/fig4/chord_diag_{id}.svg', fmt='svg')
    
    chord_list.append(chord_id)


#Display chord diagram for each mouse id in one plot side by side
hv.Layout(chord_list).cols(3)


1064
1066
1067
1069
1070
1381
883
934
935
