# Analysis of ChIP-seq simulation

In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

from IPython.display import display
import pandas as pd

import seaborn as sns
sns.set_style("whitegrid")
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
from collections import OrderedDict, Counter

In [2]:
df = pd.read_csv('/mnt/stripe/shpynov/2021_noise2/report.tsv', sep='\t', comment='#')
df.sort_values(by=['Modification','Tool', 'Fdr'], inplace=True)
df

Unnamed: 0,Modification,TruePeaks,Tool,Fdr,Peaks,Sensitivity,Specificity
6,H3K27ac,4755,Macs2,0.000001,2099,2014,2099
5,H3K27ac,4755,Macs2,0.000010,2253,2193,2253
4,H3K27ac,4755,Macs2,0.000100,2374,2352,2374
3,H3K27ac,4755,Macs2,0.001000,2564,2584,2564
1,H3K27ac,4755,Macs2,0.010000,2829,2846,2817
...,...,...,...,...,...,...,...
103,H3K4me3,1012,SPAN,0.000100,710,976,710
102,H3K4me3,1012,SPAN,0.001000,709,976,709
101,H3K4me3,1012,SPAN,0.010000,710,977,710
100,H3K4me3,1012,SPAN,0.050000,710,979,708


In [3]:
df['SensitivityF'] = df['Sensitivity'] / df['TruePeaks']
df['SpecificityF'] = df['Specificity'] / df['Peaks']
df

Unnamed: 0,Modification,TruePeaks,Tool,Fdr,Peaks,Sensitivity,Specificity,SensitivityF,SpecificityF
6,H3K27ac,4755,Macs2,0.000001,2099,2014,2099,0.423554,1.000000
5,H3K27ac,4755,Macs2,0.000010,2253,2193,2253,0.461199,1.000000
4,H3K27ac,4755,Macs2,0.000100,2374,2352,2374,0.494637,1.000000
3,H3K27ac,4755,Macs2,0.001000,2564,2584,2564,0.543428,1.000000
1,H3K27ac,4755,Macs2,0.010000,2829,2846,2817,0.598528,0.995758
...,...,...,...,...,...,...,...,...,...
103,H3K4me3,1012,SPAN,0.000100,710,976,710,0.964427,1.000000
102,H3K4me3,1012,SPAN,0.001000,709,976,709,0.964427,1.000000
101,H3K4me3,1012,SPAN,0.010000,710,977,710,0.965415,1.000000
100,H3K4me3,1012,SPAN,0.050000,710,979,708,0.967391,0.997183


In [19]:
import plotly.graph_objects as go
import plotly.express as px

for m in sorted(set(df['Modification'])):
    fig = go.Figure(layout_title_text=m)
    for tool in sorted(set(df['Tool'])):
        t = df.loc[np.logical_and(df['Modification']==m, df['Tool']==tool)].sort_values(by=['Fdr'])
        fig.add_trace(go.Scatter(x=t['SpecificityF'], y=t['SensitivityF'], 
                                 mode='lines+markers',
                                 name=tool))
    fig.update_xaxes(range=[-0.1, 1.1], title='Specificity')
    fig.update_yaxes(range=[-0.1, 1.1], title='Sensitivity')
    fig.show()