# 3 ReactomeSuperPathwaysDrivers - Plotting

In [42]:
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import glob
import numpy as np
import pickle 
import statistics as st
import seaborn as sb
import random as rd

In [43]:
class SuperPathwayClass:
    def __init__(self, name,idPathway,geneSet,networkWithCCs,networkLCC,drivers):
        self.name = name
        self.idPathway = idPathway
        self.geneSet = geneSet
        self.networkCCs = networkWithCCs
        self.networkLCC = networkLCC
        self.drivers = drivers
        self.topologicalFeaturesLCC = None
        self.layout_LCC_kamada_kawai= None
        self.layout_CCs_spring = None
        
    def info(self):
        info={}
        info['Name'] = self.name
        info['idPathway'] = self.idPathway        
        info['lenSet'] = len(self.geneSet)
        info['lenCCs'] = str(len(self.networkCCs.nodes)) +' ('+str(round(len(self.networkCCs.nodes)/len(self.geneSet)*100))+'%)'
        info['lenLCC'] = str(len(self.networkLCC.nodes)) +' ('+str(round(len(self.networkLCC.nodes)/len(self.geneSet)*100))+'%)'       
        driversLen = len(self.drivers)
        info['driversLCC'] = driversLen
        info['drivers %'] = round(driversLen/len(self.networkLCC.nodes)*100)
        
        return info

# Target Pathways
It's not feasible to analyze all 26 Super Pathways. Only the 7 Super Pathways associated with driver genes were selected.
To represent the others Pathways, ReactomeFI_AllSuperPathways was created.

In [44]:
targetPathways = ['Chromatin organization','Circadian Clock','DNA Repair','Reproduction','Gene expression (Transcription)','Developmental Biology','Programmed Cell Death','ReactomeFI_LCC_AllSuperPathways']

In [45]:
file = open("2 output/superPathwaysObjs.pkl", "rb")
superPathwaysObjs = pickle.load(file)
overView={}
for k in sorted(superPathwaysObjs):
    overView[k]=superPathwaysObjs[k].info()
    
overViewDf=pd.DataFrame.from_dict(overView,orient='index')
overViewDf.set_index(overViewDf.columns[0],inplace=True)
overViewDf=overViewDf.filter(targetPathways, axis=0)
overViewDf

Unnamed: 0_level_0,idPathway,lenSet,lenCCs,lenLCC,driversLCC,drivers %
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Chromatin organization,R-HSA-4839726,240,218 (91%),206 (86%),45,22
Circadian Clock,R-HSA-400253,70,69 (99%),64 (91%),12,19
DNA Repair,R-HSA-73894,312,290 (93%),284 (91%),48,17
Reproduction,R-HSA-1474165,114,95 (83%),81 (71%),14,17
Gene expression (Transcription),R-HSA-74160,1536,1392 (91%),1367 (89%),194,14
Developmental Biology,R-HSA-1266738,1097,972 (89%),962 (88%),137,14
Programmed Cell Death,R-HSA-5357801,216,208 (96%),201 (93%),27,13
ReactomeFI_LCC_AllSuperPathways,,11375,9472 (83%),9399 (83%),649,7


# Rename the column names

In [40]:
for name in targetPathways:
    superPathwaysObjs[name].topologicalFeaturesLCC.rename(
        columns={
            'degree':'Degree',
            'kcore':'Kcore',
            'eigenvector':'Eigenvector',
            'averageNeighbor': 'AvgNeighbors', 
            'clusteringcoeff': 'Clustering',
            'leverage': 'Leverage',
            'eccentricity': 'Eccentricity',
            'betweenness': 'Betweenness',
            'closeness': 'Closeness',
            'bridging': 'Bridging',
        },inplace=True)
    superPathwaysObjs[name].topologicalFeaturesLCC

In [11]:
def plotWithDrivers(obj):
    drivers=obj.drivers

    for i,pos in enumerate([obj.layout_LCC_kamada_kawai,obj.layout_CCs_spring],start=1):
        if(i==1):
            G=obj.networkLCC
            name=obj.name+' | N:'+str(G.number_of_nodes()) + ' | L:'+str(G.number_of_edges()) + ' | Drivers:'+str(len(drivers)) 
            layout='_kamada'
        else:
            G=obj.networkCCs
            nCCs = len(list(nx.connected_components(G)))
            name=obj.name+' | N:'+str(G.number_of_nodes()) + ' | L:'+str(G.number_of_edges())+ ' | Drivers:'+str(len(drivers)) +' | nCCs:'+str(nCCs)
            layout='_spring'
            
        others = [node for node in G.nodes() if node not in drivers]
        
        plt.figure(figsize=(16,8),dpi=550)
        #node_shapes = so^>v<dph8
        nx.draw_networkx_nodes(G, pos=pos, node_size=20, nodelist=others, node_color='#1f78b4', label='Other Genes',node_shape='o',alpha=1)
        nx.draw_networkx_nodes(G, pos=pos, node_size=300, nodelist=drivers, node_color='#FF2000', label='drivers',node_shape='o')
        nx.draw_networkx_edges(G, pos=pos, alpha=1, edge_color='gray',width=0.1)

        #Show the label only for some nodes
        posFiltered={}
        for p in pos:
            if p in drivers:
                posFiltered[p]=pos[p]
        nx.draw_networkx_labels(nx.subgraph(G,drivers), pos=posFiltered,font_size=7)

        #plt.legend(scatterpoints = 1,labelspacing = 1.5,title='Role in Cancer',loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3)
        #plt.legend(scatterpoints = 1,labelspacing = 1.5,title='Role in Cancer',fontsize='xx-small')
        plt.title(name,fontsize=16)
        plt.tight_layout()
        if(layout=='_kamada'):
            plt.savefig("./3 imgs/LCC/"+obj.name+layout+".png", format='png', dpi=400)
            plt.savefig("./3 imgs/LCC/"+obj.name+layout+".svg", format='svg')
        else:
            plt.savefig("./3 imgs/CCs/"+obj.name+layout+".png", dpi=400)
        plt.close()


### Plotting SuperPathways

In [5]:
%%time
for name in targetPathways:
    plotWithDrivers(superPathwaysObjs[name])

In [6]:
# Manual Tunning Programmed Cell Death position

superPathwaysObjs['Programmed Cell Death'].layout_LCC_kamada_kawai['CASP5']=np.array([-0.7     , -0.23])
superPathwaysObjs['Programmed Cell Death'].layout_LCC_kamada_kawai['PAK2']=np.array( [0.0836245 , 0.09])
superPathwaysObjs['Programmed Cell Death'].layout_LCC_kamada_kawai['BIRC3']=np.array([0.10587831, -0.09])
plotWithDrivers(superPathwaysObjs['Programmed Cell Death'])

### Plotting Centrality

#### Correlation

In [None]:
%%time
plt.figure(figsize=(12,6),dpi=250) 
degreeCorrOrderList=[]
for i,name in enumerate(targetPathways):
    ax=plt.subplot(2,4,i+1)
    if(name == 'ReactomeFI_LCC_AllSuperPathways'):
        plt.title('All Super Pathways',fontsize=7)
    else:    
        plt.title(name,fontsize=7)
    df = superPathwaysObjs[name].topologicalFeaturesLCC.copy()
    #normalize 0~1
    df=(df-df.min())/(df.max()-df.min())
    df_corr = df.corr()
    #Reorder based on degree correlation
    degreeCorrOrder=list(dict(sorted(dict(df_corr['Degree']).items(),key=lambda x:x[1],reverse=True)).keys())
    df = df[degreeCorrOrder]
    degreeCorrOrderList.append(degreeCorrOrder)
    #Recalculte the correlation Matriz. Seens better/easier than shape the original df_corr
    df_corr = df.corr()
    # color map
    cmap = sb.diverging_palette(10,240,90, as_cmap=True)
    ax=sb.heatmap(df_corr,annot=True, fmt=".1f", 
               linewidths=0.5, cmap=cmap, vmin=-1, vmax=1, 
               cbar=False, square=True,annot_kws={"size": 5})
   
    plt.yticks(rotation=0,fontsize=4)
    plt.xticks(rotation=90,fontsize=4)
    plt.tight_layout()  

#plt.title('Centrality Correlation')
plt.tight_layout()
plt.savefig('./3 imgs/Correlation.png', format='png', dpi=800)
plt.savefig('./3 imgs/Correlation.svg', format='svg')
plt.show()
plt.close()

In [None]:
%%time
plt.figure(figsize=(10,10),dpi=500) 
degreeCorrOrderList=[]

name = 'ReactomeFI_LCC_AllSuperPathways'
plt.title('All Super Pathways',fontsize=16)
df = superPathwaysObjs[name].topologicalFeaturesLCC.copy()

#normalize 0~1
df=(df-df.min())/(df.max()-df.min())
df_corr = df.corr()

#Reorder based on total correlation
absoluteCorrelation = dict(df_corr.abs().sum())
absoluteCorrelation = dict(sorted(absoluteCorrelation.items(), key=lambda x:x[1],reverse=True))
degreeCorrOrder=list(dict(sorted(dict(df_corr['Degree']).items(),key=lambda x:x[1],reverse=True)).keys())
df = df[list(absoluteCorrelation.keys())]

#Recalculte the correlation Matriz. Seens better/easier than shape the original df_corr
df_corr = df.corr()

#Add the new colum
df_corr['Abs Value']=absoluteCorrelation_df

#Reajust Values
df_corr=df_corr.replace(1, np.nan)
df_corr['Abs Value']=df_corr['Abs Value']-1

# color map
cmap = sb.diverging_palette(10,240,90, as_cmap=True)
ax=sb.heatmap(df_corr,annot=True, fmt=".2f", 
           linewidths=0.5, cmap=cmap, vmin=-1, vmax=1, 
           cbar=False, square=True,annot_kws={"size": 12})

plt.yticks(rotation=0,fontsize=12)
plt.xticks(rotation=90,fontsize=12)


plt.tight_layout()
plt.savefig('./3 imgs/Correlation All Super Pathways.png', format='png', dpi=300)
plt.savefig('./3 imgs/Correlation All Super Pathways.svg', format='svg')
plt.show()
plt.close()

#### Driver Vs No-Drivers

In [12]:
driversCentrality={}
for name in targetPathways:
    driversCentrality[name]={}
    df=superPathwaysObjs[name].topologicalFeaturesLCC
    cols = ['Degree','Clustering' ,'Betweenness' , 'Closeness' ]
    drivers=superPathwaysObjs[name].drivers
    for col in cols:
        medianNotDrivers=df[~df.index.isin(drivers)][col].median()
        medianDriver=df[df.index.isin(drivers)][col].median()
        meanNotDrivers=df[~df.index.isin(drivers)][col].mean()
        meanDriver=df[df.index.isin(drivers)][col].mean()
        driversCentrality[name][col]=str(round(meanDriver/meanNotDrivers,2))+' - '+str(round(medianDriver/medianNotDrivers,2))
        
pd.DataFrame.from_dict(driversCentrality,orient='index')

Unnamed: 0,Degree,Clustering,Betweenness,Closeness
Chromatin organization,1.09 - 1.12,0.89 - 0.84,1.05 - 2.57,1.02 - 1.07
Circadian Clock,1.5 - 1.78,0.89 - 0.89,2.89 - 4.86,1.11 - 1.14
DNA Repair,1.09 - 1.31,0.91 - 0.95,1.1 - 0.97,1.02 - 1.03
Reproduction,1.55 - 1.56,0.88 - 0.81,1.47 - 6.65,1.16 - 1.15
Gene expression (Transcription),1.55 - 2.24,0.84 - 0.71,2.96 - 11.53,1.1 - 1.1
Developmental Biology,1.55 - 1.79,0.76 - 0.64,3.86 - 5.83,1.14 - 1.12
Programmed Cell Death,1.36 - 2.07,0.61 - 0.6,7.74 - 18.1,1.13 - 1.12
ReactomeFI_LCC_AllSuperPathways,2.3 - 3.61,0.73 - 0.69,5.29 - 12.37,1.12 - 1.1


### Box Plot

In [None]:
for name in targetPathways:
    df=superPathwaysObjs[name].topologicalFeaturesLCC.copy()
    #normalize 0~1
    df=(df-df.min())/(df.max()-df.min())
    cols = ['Degree','Clustering','Betweenness','Closeness']
    drivers=superPathwaysObjs[name].drivers

    #Remove the outliers from Betweeness #ignore the top 0,05%
    th=int(len(df['Betweenness'])*0.05)
    dfBet=pd.DataFrame()
    dfBet['Betweenness']=df['Betweenness'].sort_values()[0:-th]
    dfBet=(dfBet-dfBet.min())/(dfBet.max()-dfBet.min())

    #Plotting
    fig, ax = plt.subplots(figsize=(12, 2.75))
    # Remove top and right border
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    # Remove y-axis tick marks
    ax.yaxis.set_ticks_position('none')

    # Set plot title
    if(name == 'ReactomeFI_LCC_AllSuperPathways'):
        ax.set_title('All Super Pathways',fontsize=13)
    else:    
        ax.set_title(name,fontsize=13)

    # Add major gridlines in the y-axis
    ax.grid(color='grey', axis='y', linestyle='-', linewidth=0.25, alpha=0.5)

    # Set the colors for each distribution
    colors = ['blue','red','orange','white']
    blueColor = dict(color=colors[0])
    redColor = dict(color=colors[1])
    orangeColor = dict(color=colors[2])
    whiteColor = dict(color=colors[3])
    contColumns=0
    for col in cols:   
        ####GET VALUES FROM DRIVERS, NOTDRIVERS AND RANDOM
        if(col !='Betweenness'):    
            notDriversMeasures=df[~df.index.isin(drivers)][col]
            driversMeasures=df[df.index.isin(drivers)][col]
            allGenesFromG = list(df.index)
        else:
            notDriversMeasures=dfBet[~dfBet.index.isin(drivers)][col]
            driversMeasures=dfBet[dfBet.index.isin(drivers)][col]
            allGenesFromG = list(dfBet.index)


        rd.shuffle(allGenesFromG)

        #Random Sampling - There are many more notDrivers than Drivers. So we plot a "consensus" plot, that is the average of 'numberOfRepetions' values
        randomValues1 = np.array(df[df.index.isin(allGenesFromG[:len(drivers)])][col])    

        numberOfRepetions=9
        randomValues10 = np.array(df[df.index.isin(allGenesFromG[:len(drivers)])][col])
        for _ in range(numberOfRepetions):
            rd.shuffle(allGenesFromG)    
            randomValues10 += np.array(df[df.index.isin(allGenesFromG[:len(drivers)])][col])
        randomValues10=randomValues10/(numberOfRepetions+1)

        numberOfRepetions=99
        randomValues100 = np.array(df[df.index.isin(allGenesFromG[:len(drivers)])][col])
        for _ in range(numberOfRepetions):
            rd.shuffle(allGenesFromG)    
            randomValues100 += np.array(df[df.index.isin(allGenesFromG[:len(drivers)])][col])
        randomValues100=randomValues100/(numberOfRepetions+1)

        ####START PLOTTING    
        labels=[col]

        contColumns+=1
        ax.boxplot(notDriversMeasures, positions=[contColumns],widths = 0.6, boxprops=blueColor, medianprops=blueColor, whiskerprops=blueColor, capprops=blueColor, flierprops=dict(markeredgecolor=colors[0]))

        contColumns+=1
        ax.boxplot(driversMeasures, positions=[contColumns], widths = 0.6, boxprops=redColor, medianprops=redColor, whiskerprops=redColor, capprops=redColor, flierprops=dict(markeredgecolor=colors[1]))

        contColumns+=1
        ax.boxplot(randomValues1, positions=[contColumns],widths = 0.6, boxprops=orangeColor, medianprops=orangeColor, whiskerprops=orangeColor, capprops=orangeColor, flierprops=dict(markeredgecolor=colors[2]))

        contColumns+=1
        ax.boxplot(randomValues10, positions=[contColumns],widths = 0.6, boxprops=orangeColor, medianprops=orangeColor, whiskerprops=orangeColor, capprops=orangeColor, flierprops=dict(markeredgecolor=colors[2]))

        contColumns+=1
        ax.boxplot(randomValues100, positions=[contColumns],widths = 0.6, boxprops=orangeColor, medianprops=orangeColor, whiskerprops=orangeColor, capprops=orangeColor, flierprops=dict(markeredgecolor=colors[2]))

        #Fake plot, just to add a space between three that matters
        if(col!=cols[-1]):
            contColumns+=1
            ax.boxplot([0.1],positions=[contColumns], widths = 0, boxprops=whiteColor, medianprops=whiteColor, whiskerprops=whiteColor, capprops=whiteColor,flierprops=dict(markeredgecolor=colors[3]))

    ax.set_xticks(np.arange(3,25,6))
    ax.set_xticklabels(cols,fontsize=12)
    plt.yticks(fontsize=10)
    plt.tight_layout()
    plt.savefig('./3 imgs/measures/Measure Distribution v2 '+name+' 5 Plots threshold.png', format='png', dpi=800)
    plt.savefig('./3 imgs/measures/Measure Distribution v2 '+name+' 5 Plots threshold.svg', format='svg')
    plt.show()
    plt.close()