In [None]:
from benchmarkers import IPRG2015Benchmarker, UPSBenchmarker
from sklearn.metrics import r2_score
import pandas as pd
import matplotlib.pyplot as plt


def compare_preds(bm1, bm2):

    preds1 = bm1.get_estimated_logFCs(bm1.protein_table).reset_index()
    preds2 = bm2.get_estimated_logFCs(bm2.protein_table).reset_index()
    comparison_table = preds1.merge(preds2, on = "ProteinId")
    comparison_table.columns = ["id","x","y"]
    
    fig, axs = plt.subplots(1, 1, constrained_layout=True)
    hb = axs.hexbin(comparison_table['x'], comparison_table['y'], gridsize=15, cmap = "PuRd")
    score = r2_score(comparison_table.x,comparison_table.y)
    axs.set_title('Predictions Hexbin: R2 = ' + str(round(score,3)))
    axs.set_xlabel('Log_2 fold change Perseus')
    axs.set_ylabel('Log_2 fold change MD+ Discovery BYO')
    cb = fig.colorbar(hb, ax=axs)
    plt.show()
    return 

mq_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/ups_old/ups_perseus/"
bm1 = UPSBenchmarker(mq_home, mode = "Perseus")
mq_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/ups_old/ups_byo/"
bm2 = UPSBenchmarker(mq_home, mode = "BYO")
experiment_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/ups_old/ups/clf_prediction"
bm3 = UPSBenchmarker(experiment_home)


compare_preds(bm1, bm2)
#compare_preds(bm1, bm3)
#compare_preds(bm2, bm3)

In [None]:
import itertools
import plotly.graph_objects as go
def compare_preds_iPRG(bm1, bm2):

        estLogFCs1 = bm1.get_estimated_logFCs(bm1.protein_table)
        estLogFCs2 = bm2.get_estimated_logFCs(bm2.protein_table)

        #convert to 1d and get labels
        ests1 = [list(estLogFCs1.loc[i]) for i in estLogFCs1.index]
        ests1 = list(itertools.chain.from_iterable(ests1))
        ests2 = [list(estLogFCs2.loc[i]) for i in estLogFCs1.index]
        ests2 = list(itertools.chain.from_iterable(ests2))

        proteins = [list([i for x in estLogFCs1.loc[i]]) for i in estLogFCs1.index]
        proteins = list(itertools.chain.from_iterable(proteins))
        comparisons = [list(estLogFCs1.loc[i].index) for i in estLogFCs1.index]
        comparisons = list(itertools.chain.from_iterable(comparisons))
        labels = [p+" "+c for p,c in zip(proteins,comparisons)]


        fig = go.Figure()
        fig.add_trace(go.Scatter(x=[-15, 15], y=[-15, 15], mode="lines",
                                    line=go.scatter.Line(
                                        color="gray", dash="dashdot"),
                                    showlegend=False))
        
        fig.add_trace(go.Scatter(x=ests1, y=ests2,
                                mode='markers',
                                hovertext=labels))
        score = r2_score(ests1,ests2)
        title = "LogFC: R^2 = " + str(round(score,3))
        fig.update_layout(title= title,
                        xaxis_title="Perseus LogFC",
                        yaxis_title="MD+ Discovery BYO LogFC")

        fig.show()
        return #fig

mq_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/IPRG2015_MQ_BYO/"
bm1 = IPRG2015Benchmarker(mq_home, "BYO")
mq_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/IPRG2015_MQ_Perseus/"
bm2 = IPRG2015Benchmarker(mq_home, "Perseus")
compare_preds_iPRG(bm2, bm1)


In [None]:
#Import libraries
from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
from matplotlib_venn import venn3, venn3_circles
from matplotlib import pyplot as plt

def get_venn_diagram_all_detections(bm1, bm2):
    dets_1 = set(bm1.protein_table.index[bm1.protein_table["Q-value-ident"] < 0.01].to_list())
    dets_2 = set(bm2.protein_table.index[bm2.protein_table["Q-value-ident"] < 0.01].to_list())
    fig = venn2(subsets = (len(dets_1 - dets_2), len(dets_2 - dets_1), len(dets_1.intersection(dets_2))), 
            set_labels = ('Group A', 'Group B'))

    fig    
    return

get_venn_diagram_all_detections(bm1, bm2)

In [None]:
def get_venn_diagram_true_positives(bm1, bm2):
    dets_1 = set(bm1.protein_table.index[bm1.protein_table["Q-value-ident"] < 0.01].to_list())
    dets_2 = set(bm2.protein_table.index[bm2.protein_table["Q-value-ident"] < 0.01].to_list())
    true_positives
    fig = venn2(subsets = (len(dets_1 - dets_2), len(dets_2 - dets_1), len(dets_1.intersection(dets_2))), 
            set_labels = ('Group A', 'Group B'))

    fig  
    return  

In [None]:
from benchmarkers import IPRG2015Benchmarker, UPSBenchmarker
from confusion_matrix_calculator import ConfusionMatrixCalculator

mq_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/ups_old/ups_perseus/"
bm1 = UPSBenchmarker(mq_home, mode = "Perseus")
mq_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/ups_old/ups_byo/"
bm2 = UPSBenchmarker(mq_home, mode = "BYO")
experiment_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/ups_old/ups/clf_prediction"
bm3 = UPSBenchmarker(experiment_home)

comparison_1 = ConfusionMatrixCalculator().get_comparison_table(bm1)
comparison_2 = ConfusionMatrixCalculator().get_comparison_table(bm2)
comparison_3 = ConfusionMatrixCalculator().get_comparison_table(bm3)

def get_venn_true_proteins(comparison_1, comparison_2):

    comparison = pd.merge(comparison_1, 
                            comparison_2, 
                            on = "ProteinId", 
                            suffixes=("_1","_2"))
    comparison = comparison[comparison.Real_1]
    dets_1 = set(comparison.index[comparison.Positive_1])
    dets_2 = set(comparison.index[comparison.Positive_2])
    fig = venn2(subsets = (len(dets_1 - dets_2), len(dets_2 - dets_1), len(dets_1.intersection(dets_2))), 
            set_labels = ('Group A', 'Group B'))

    return #comparison[comparison.Real_1]

get_venn_true_proteins(comparison_1, comparison_2)

# Volcano Plotter

In [1]:
import re
import numpy as np
from benchmarkers import IPRG2015Benchmarker, UPSBenchmarker
from benchmarkers import HerStudyBenchmarker
from confusion_matrix_calculator import ConfusionMatrixCalculator

experiment_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/her_study/perseus"
bm1 = HerStudyBenchmarker(experiment_home, mode = "Perseus")
experiment_home = "/mnt/d/Dropbox/MassDynamics_local/experiments/her_study/byo"
bm2 = HerStudyBenchmarker(experiment_home, "BYO")


flipped


In [2]:
bm1.protein_table

Unnamed: 0_level_0,Q-value-ident,adj.P.Val,logFC,ProteinId
ProteinId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A0AV96,0.0,0.806795,-0.138375,A0AV96
A0FGR8,0.0,0.007590,-5.049960,A0FGR8
A4D1S0,0.0,0.159633,-1.609570,A4D1S0
A6NDG6,0.0,0.408110,-0.787791,A6NDG6
Q99613,0.0,0.094087,-3.154830,Q99613
...,...,...,...,...
Q9Y679,0.0,0.392580,-0.743411,Q9Y679
Q9Y696,0.0,0.589874,1.116480,Q9Y696
Q9Y6E2,0.0,0.062678,-3.443650,Q9Y6E2
Q9Y6I9,0.0,0.006222,-3.538570,Q9Y6I9


In [12]:


volcano_plot(bm1.protein_table)

In [8]:
volcano_plot(bm2.protein_table)

            Q-value-ident     logFC  adj.P.Val   ProteinId
ProteinId                                                 
P0DPI2                0.0 -1.713604   0.019740      P0DPI2
A0A0U1RRL7            0.0 -2.544723   0.005627  A0A0U1RRL7
A0AV96                0.0 -1.135442   0.048432      A0AV96


In [25]:
def get_sets(pt1, pt2):
    a_not_b = set(pt1.ProteinId).difference(set(pt2.ProteinId))
    both = set(pt1.ProteinId).intersection(set(pt2.ProteinId))
    return a_not_b, both

a_not_b, both = get_sets(bm1.protein_table,bm2.protein_table)
len(a_not_b)

0

In [None]:
def get_sets(pt1, pt2):
    a_not_b = set(pt1.ProteinId).difference(set(pt2.ProteinId))
    both = set(pt1.ProteinId).intersection(set(pt2.ProteinId))
    return a_not_b, both

a_not_b, both = get_sets(bm1.protein_table,bm2.protein_table)
len(a_not_b)

In [19]:
bm1.protein_table.loc[list(both),:]

Unnamed: 0_level_0,Q-value-ident,adj.P.Val,logFC,ProteinId
ProteinId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
P10768,0.0,0.190222,1.317910,P10768
Q8NBN7,0.0,0.017181,-1.949400,Q8NBN7
O14776,0.0,0.007227,-3.818050,O14776
P62851,0.0,0.230914,-3.190410,P62851
Q99880,0.0,0.004255,-7.938260,Q99880
...,...,...,...,...
Q86V81,0.0,0.164530,-2.969710,Q86V81
Q8WVM8,0.0,0.005378,-4.280530,Q8WVM8
Q9BY44,0.0,0.797782,-0.449605,Q9BY44
Q9UKY7,0.0,0.044626,1.144800,Q9UKY7


In [27]:
def overlap_volcano_plot(protein_table, 
                        protein_table_2, 
                        comparison = None):

    a_not_b, both = get_sets(protein_table, protein_table_2)
    
    if comparison:
        protein_table = bm1.protein_table.filter(regex = comparison)
        protein_table = protein_table.rename(columns=lambda x: re.sub(comparison,'',x))
    
    #create figure
    fig = go.Figure()

    fig = add_volcano_series(fig, protein_table.loc[list(both),:], "blue")
    fig = add_volcano_series(fig, protein_table.loc[list(a_not_b),:], "red")

    width = max(np.abs(protein_table["logFC"]))
    fig = format_volcano_plot(fig, width)
    fig.show()

def add_volcano_series(fig, protein_table, colour):
    
    x = protein_table["logFC"]
    y = -1*np.log10(protein_table["adj.P.Val"])


    fig.add_trace(go.Scatter(x=x, y=y,marker=dict(size=5,
                                line=dict(width=1,
                                            color=colour)),
                            mode='markers',
                            hovertext=protein_table.index, showlegend=True))

    return fig

overlap_volcano_plot(bm2.protein_table,bm1.protein_table)