In [20]:
#!pip install plotly

In [14]:
import os
import pandas as pd
import plotly.express as px
import yaml

In [20]:
data_dir = '../data'
config_path = '../config.yml'
predictions_path = os.path.join(data_dir, 'predictions.parquet')
tissues_path = os.path.join(data_dir, 'tissues_cell_types.parquet')

In [16]:
def read_yaml(yaml_file):
    """
    Reads YAML file and stores it in a dictionary
    :param str yaml_file: path to yaml file
    :return: a dictionary with the content of the yaml file
    """
    content = None
    with open(yaml_file, 'r') as stream:
        try:
            content = yaml.safe_load(stream)
        except yaml.YAMLError as err:
            raise yaml.YAMLError("The yaml file {} could not be parsed. {}".format(yaml_file, err))
    return content

In [21]:
predictions = pd.read_parquet(predictions_path)
tissues = pd.read_parquet(tissues_path)
config = read_yaml(config_path)

In [4]:
predictions.head()

Unnamed: 0,taxid1,taxid1_label,source_color,source_shape,source,source_name,taxid2,taxid2_label,target_color,target_shape,target,target_name,experimental_evidence_score,databases_evidence_score,weight,group1,group2,edge_type
0,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_818111.1,XP_818111.1,9606,Homo sapiens,#525252,dot,9606.ENSP00000355865,PARK2,0.269,0.799,0.534,KOG0169,KOG0006,inter-species
1,5664,Leishmania major,#fb9a99,diamond,5664.LmjF.30.1610,LmjF.30.1610,9606,Homo sapiens,#525252,dot,9606.ENSP00000339740,CAMK2D,0.0,0.77,0.385,KOG0039,KOG0033,inter-species
2,5679,Leishmania panamensis,#ff7f00,diamond,5679.XP_010701186.1,XP_010701186.1,9606,Homo sapiens,#525252,dot,9606.ENSP00000339740,CAMK2D,0.0,0.77,0.385,KOG0039,KOG0033,inter-species
3,5671,Leishmania infantum,#e31a1c,diamond,5671.XP_001467009.1,XP_001467009.1,9606,Homo sapiens,#525252,dot,9606.ENSP00000339740,CAMK2D,0.0,0.77,0.385,KOG0039,KOG0033,inter-species
4,5691,Trypanosoma brucei,#bc80bd,diamond,5691.EAN79407,EAN79407,9606,Homo sapiens,#525252,dot,9606.ENSP00000339740,CAMK2D,0.0,0.77,0.385,KOG0039,KOG0033,inter-species


In [5]:
tissues.head()

Unnamed: 0,Gene,Tissue,Gene name,Cluster,Cell type,Read count,pTPM
0,9606.ENSP00000000233,brain,ARF5,c-9,excitatory neurons,48578715.0,104.5
1,9606.ENSP00000000233,brain,ARF5,c-26,inhibitory neurons,14766772.0,91.4
2,9606.ENSP00000000233,brain,ARF5,c-43,microglial cells,390440.0,76.8
3,9606.ENSP00000000233,brain,ARF5,c-35,astrocytes,1538564.0,56.5
4,9606.ENSP00000000233,brain,ARF5,c-12,oligodendrocytes,8026745.0,52.9


In [6]:
aux = pd.merge(predictions.rename({'target':'Gene'}, axis=1), tissues, on='Gene', how='left')
aux['Cell type'] = aux['Cell type'].fillna("Not available")

In [28]:
tissue_df = []
mapped_tissues = config['tissues']
for ident in aux['taxid1'].unique():
    tissues = [mapped_tissues[t].lower() for t in config['parasites'][int(ident)]['tissues']]
    df = aux[(aux['taxid1']==ident) & (aux['Tissue'].isin(tissues))]
    tissue_df.append(df)
tissue_df = pd.concat(tissue_df)

In [29]:
tissue_df[tissue_df['taxid1_label'] == "Plasmodium falciparum"][["source_name", "target_name", "Tissue", "Cell type"]]

Unnamed: 0,source_name,target_name,Tissue,Cell type
13446,PFC1045c,SPTAN1,liver,endothelial cells
13447,PFC1045c,SPTAN1,liver,hepatic stellate cells
13448,PFC1045c,SPTAN1,liver,cholangiocytes
13449,PFC1045c,SPTAN1,liver,t-cells
13450,PFC1045c,SPTAN1,liver,hepatocytes
...,...,...,...,...
1757555,PF11_0486,TLN1,skin,langerhans cells
1757556,PF11_0486,TLN1,skin,t-cells
1757557,PF11_0486,TLN1,skin,basal keratinocytes
1757558,PF11_0486,TLN1,skin,melanocytes


In [30]:
counts_tissues = tissue_df.groupby(['taxid1', 'Tissue']).count()['taxid2'].reset_index()
counts_tissues = counts_tissues.rename({'taxid2':'edges_tissue'}, axis=1)
counts_tissues.head()

Unnamed: 0,taxid1,Tissue,edges_tissue
0,5664,blood,279
1,5664,mouth,180
2,5664,skin,3814
3,5671,blood,243
4,5671,bone marrow,629


In [31]:
counts_cells = tissue_df.groupby(['taxid1', 'Tissue', 'Cell type']).count()['taxid2'].reset_index()
counts_cells = counts_cells.rename({'taxid2':'edges_cell_type'}, axis=1)
counts_cells.head()

Unnamed: 0,taxid1,Tissue,Cell type,edges_cell_type
0,5664,blood,Not available,279
1,5664,mouth,Not available,180
2,5664,skin,Not available,5
3,5664,skin,basal keratinocytes,480
4,5664,skin,endothelial cells,479


In [32]:
tissue_df = pd.merge(tissue_df, counts_tissues, on=['taxid1', 'Tissue'], how='left')
tissue_df = pd.merge(tissue_df, counts_cells, on=['taxid1', 'Tissue', 'Cell type'], how='left')
tissue_df.head()

Unnamed: 0,taxid1,taxid1_label,source_color,source_shape,source,source_name,taxid2,taxid2_label,target_color,target_shape,...,group2,edge_type,Tissue,Gene name,Cluster,Cell type,Read count,pTPM,edges_tissue,edges_cell_type
0,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-1,muller glia cells,3048953.0,112.2,2372,409
1,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-3,rod photoreceptor cells,2023533.0,105.3,2372,409
2,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-13,endothelial cells,747331.0,97.7,2372,405
3,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-9,bipolar cells,1134365.0,93.4,2372,406
4,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-11,cone photoreceptor cells,181322.0,60.7,2372,379


In [33]:
tissue_df.sort_values(by="edges_tissue", ascending=False)

Unnamed: 0,taxid1,taxid1_label,source_color,source_shape,source,source_name,taxid2,taxid2_label,target_color,target_shape,...,group2,edge_type,Tissue,Gene name,Cluster,Cell type,Read count,pTPM,edges_tissue,edges_cell_type
492499,6334,Trichinella spiralis,#fdb462,diamond,6334.EFV51622,WNT4,9606,Homo sapiens,#525252,dot,...,KOG3589,inter-species,brain,RGS2,c-42,oligodendrocyte precursor cells,1265946.0,5.5,83231,13633
400660,6334,Trichinella spiralis,#fdb462,diamond,6334.EFV49264,EFV49264,9606,Homo sapiens,#525252,dot,...,KOG1215,inter-species,brain,LRP3,c-42,oligodendrocyte precursor cells,1265946.0,23.7,83231,13633
400662,6334,Trichinella spiralis,#fdb462,diamond,6334.EFV49264,EFV49264,9606,Homo sapiens,#525252,dot,...,KOG1215,inter-species,brain,LRP3,c-12,oligodendrocytes,8026745.0,2.4,83231,14003
400663,6334,Trichinella spiralis,#fdb462,diamond,6334.EFV48077,EFV48077,9606,Homo sapiens,#525252,dot,...,KOG1215,inter-species,brain,LRP3,c-35,astrocytes,1538564.0,37.7,83231,13526
400664,6334,Trichinella spiralis,#fdb462,diamond,6334.EFV48077,EFV48077,9606,Homo sapiens,#525252,dot,...,KOG1215,inter-species,brain,LRP3,c-41,inhibitory neurons,3515201.0,34.1,83231,14068
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287133,5865,Babesia bovis,#c51b7d,diamond,5865.XP_001610061.1,18.m06451,9606,Homo sapiens,#525252,dot,...,KOG1654,inter-species,blood,,,Not available,,,28,28
287134,5865,Babesia bovis,#c51b7d,diamond,5865.XP_001611086.1,BBOV_IV011660,9606,Homo sapiens,#525252,dot,...,KOG3637,inter-species,blood,,,Not available,,,28,28
287135,5865,Babesia bovis,#c51b7d,diamond,5865.XP_001611086.1,BBOV_IV011660,9606,Homo sapiens,#525252,dot,...,KOG4571,inter-species,blood,,,Not available,,,28,28
287136,5865,Babesia bovis,#c51b7d,diamond,5865.XP_001608923.1,19.m02079,9606,Homo sapiens,#525252,dot,...,KOG3972,inter-species,blood,,,Not available,,,28,28


In [34]:
tissue_df.head()

Unnamed: 0,taxid1,taxid1_label,source_color,source_shape,source,source_name,taxid2,taxid2_label,target_color,target_shape,...,group2,edge_type,Tissue,Gene name,Cluster,Cell type,Read count,pTPM,edges_tissue,edges_cell_type
0,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-1,muller glia cells,3048953.0,112.2,2372,409
1,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-3,rod photoreceptor cells,2023533.0,105.3,2372,409
2,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-13,endothelial cells,747331.0,97.7,2372,405
3,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-9,bipolar cells,1134365.0,93.4,2372,406
4,5693,Trypanosoma cruzi,#01665e,diamond,5693.XP_808251.1,XP_808251.1,9606,Homo sapiens,#525252,dot,...,KOG0660,inter-species,eye,MAPK3,c-11,cone photoreceptor cells,181322.0,60.7,2372,379


In [35]:
tissue_df['Tissue'].unique().tolist()

['eye',
 'skin',
 'blood',
 'heart',
 'mouth',
 'bone marrow',
 'spleen',
 'liver',
 'brain',
 'spinal cord',
 'lymph node',
 'urogenital system',
 'intestine',
 'muscle',
 'placenta',
 'lung',
 'gastrointestinal tract']

In [37]:
fig = px.icicle(tissue_df, path=[px.Constant("Parasites"), 'taxid1_label', 'Tissue', 'Cell type'], values='edges_cell_type',
                  color='edges_cell_type', hover_data=['edges_tissue', 'edges_cell_type', 'taxid1', 'taxid1_label', 'pTPM'],
                  color_continuous_scale='Burgyl', height=900, width=1200, maxdepth=-1)
fig.show()