# Analysis of ChIP-seq simulation

In [None]:
import glob
%matplotlib inline
%config InlineBackend.figure_format='retina'
import matplotlib.pyplot as plt

import os

import pandas as pd
import seaborn as sns
from tqdm.auto import tqdm
sns.set_style("whitegrid")
import tempfile
import numpy as np

In [None]:
PATH = os.path.expanduser('~/data/2023_chips')
N = 5
MULTIPLIERS = [1.0, 0.5, 0.2]

FIGURES_DIR = f'{PATH}/pics'
EXT = 'png'
! mkdir -p {FIGURES_DIR}
! rm {FIGURES_DIR}/*

# Chips models analysis

In [None]:
import json

models_df = pd.DataFrame(columns=['Modification', 'Model', 'k', 'theta', 'prc_rate', 'f', 's'],
                         dtype=object)
for modification in ['H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me3']:
    path = os.path.join(PATH, 'models', f'{modification}.json')
    with open(path) as f:
        m = json.load(f)
        k, theta, prc_rate, f, s = m['frag']['k'], m['frag']['theta'], m['pcr_rate'], \
            m['pulldown']['f'], m['pulldown']['s']
        models_df.loc[len(models_df)] = (modification, path, k, theta, prc_rate, f, s)
models_df.to_csv(os.path.join(PATH, 'models.tsv'), sep='\t', index=None)
models_df

In [None]:
fig = plt.figure(figsize=(4, 4))
sns.scatterplot(data=models_df, x='k', y='theta', hue='Modification', markers=True)
plt.title(f'Fragment lengths gamma distribution')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'models_fragments_gamma.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
fig = plt.figure(figsize=(4, 4))
sns.scatterplot(data=models_df, x='s', y='f', hue='Modification', markers=True)
plt.title(f'Fraction of Reads in Peaks vs Bound Genome Fraction')
plt.xlabel('Fraction of Reads in Peaks')
plt.ylabel('Bound Genome Fraction')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'models_frip_vs_bound_genome.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

# Report analysis

In [None]:
def bedl(file):
    try:
        tf = pd.read_csv(file, sep='\t', header=None)
        return tf[2] - tf[1]
    except:
        return np.zeros(0)  # Empty file


def lines(file):
    try:
        tf = pd.read_csv(file, sep='\t', header=None)
        return len(tf)
    except:
        return 0  # Empty file

def d(a, b):
    return a / b if b != 0 else 0

def last_col(file):
    try:
        cols = len(pd.read_csv(file, sep='\t', nrows=1, header=None).columns)
        return pd.read_csv(file, sep='\t', header=None, usecols=[cols - 1])[cols - 1]
    except:
        return np.zeros(0)  # Empty file


def sorted_file(file):
    ts = tempfile.mktemp()
    !cat {file} | sort -k1,1 -k2,2n > {ts}
    return ts

In [None]:
original_peaks = {
    'H3K27ac': PATH + '/models/H3K27ac_100_0.05_3.peak',
    'H3K27me3' : PATH + '/models/H3K27me3_100_0.05_3.peak',
    'H3K36me3': PATH + '/models/H3K36me3_100_0.05_3.peak',
    'H3K4me1': PATH + '/models/H3K4me1_100_0.05_3.peak',
    'H3K4me3': PATH + '/models/H3K4me3_100_0.05_3.peak'
}
original_peaks

In [None]:
import glob
sampled_peaks = {}
for m in ['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed']:
    for n in range(1, N + 1):
        sampled_peaks[(m, n)] = glob.glob(PATH + f'/fastq/{m}_*_{n}.bed')[0]
sampled_peaks

In [None]:
from itertools import product
# Modification Mult Library I TruePeaksFile TruePeaks TrueLength Tool PeaksFile Fdr Peaks Length PrecisionP RecallP Intersection
def load_peaks(path, suffix):
    df_fdr = pd.DataFrame(columns=['file', 'modification', 'n', 'multiplier'], dtype=object)
    for (m, n, mult) in tqdm(product(['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'], range(1, N + 1), MULTIPLIERS)):
        peaks_path = glob.glob(f'{path}/{m}*_{n}_{mult}*{suffix}')[0]
        df_fdr.loc[len(df_fdr)] = (peaks_path, m, n, mult)
    return df_fdr

In [None]:
df_macs2 = load_peaks(PATH + '/macs2', '.narrowPeak')
df_macs2['tool'] = 'MACS2'
df_macs2

In [None]:
df_macs2broad = load_peaks(PATH + '/macs2', '.broadPeak')
df_macs2broad['tool'] = 'MACS2 broad'
df_macs2broad

In [None]:
df_span = load_peaks(PATH + '/span', '.peak')
df_span['tool'] = 'SPAN'
df_span

In [None]:
df_sicer = load_peaks(PATH + '/sicer', 'FDR0.01')
df_sicer['tool'] = 'SICER'
df_sicer

In [None]:
df = pd.concat([df_macs2, df_macs2broad, df_span, df_sicer]).reset_index(drop=True)
df.sample(5)

In [None]:
tf = tempfile.mktemp()
t = []
for _, (file, m, n) in tqdm(df[['file', 'modification', 'n']].iterrows()):
    true_peaks_file = sampled_peaks[(m, n)]
    true_peaks = lines(true_peaks_file)
    true_length = bedl(true_peaks_file).sum()
    peaks = lines(file)
    length = bedl(file).sum()
    true_peaks_file_s = sorted_file(true_peaks_file)
    file_s = sorted_file(file)
    !bedtools intersect -a {true_peaks_file_s} -b {file_s} -wa -u > {tf}
    recall = lines(tf)
    recall_len = bedl(tf).sum()
    !bedtools intersect -b {true_peaks_file_s} -a {file_s} -wa -u > {tf}
    precision = lines(tf)
    precision_len = bedl(tf).sum()
    !bedtools intersect -a {true_peaks_file_s} -b {file_s} -wo > {tf}
    overlap_len = int(last_col(tf).sum())
    t.append((true_peaks_file, true_peaks, true_length, peaks, length, precision, recall, precision_len, recall_len, overlap_len))
t = pd.DataFrame(t, columns=['true_peaks_file', 'true_peaks', 'true_length',
                             'peaks', 'length', 'precision', 'recall', 'precision_len', 'recall_len', 'overlap_len'])
t

In [None]:
for c in t.columns:
    df[c] = list(t[c])
del t
df.sample(5)

In [None]:
df['Precision'] = df['precision'] / df['peaks']
df['Recall'] = df['recall'] / df['true_peaks']
df['PrecisionL'] = df['precision_len'] / df['length']
df['RecallL'] = df['recall_len'] / df['true_length']

df['F1'] = [2 / (1 / p + 1 / r) if min(p, r) > 0 else 0 for p, r in zip(df['Precision'], df['Recall'])]
df['F1L'] = [2 / (1 / p + 1 / r) if min(p, r) > 0 else 0 for p, r in zip(df['PrecisionL'], df['RecallL'])]
df['Jaccard'] = df['overlap_len'] / (df['true_length'] + df['length'] - df['overlap_len'])
df.sample(5)

In [None]:
def good(df):
    return df['multiplier'] == 1.0

def average(df):
    return df['multiplier'] == 0.5

def bad(df):
    return df['multiplier'] == 0.2

def with_quality(df, add_all=False):
    ts = []

    t = df.loc[good(df)].copy()
    t['quality'] = 'Good'
    ts.append(t)

    t = df.loc[average(df)].copy()
    t['quality'] = 'Average'
    ts.append(t)

    t = df.loc[bad(df)].copy()
    t['quality'] = 'Bad'
    ts.append(t)

    if add_all:
        t = df.copy()
        t['quality'] = 'All'
        ts.append(t)
    return pd.concat(ts)

df = with_quality(df)
df.sample(5)

# Peaks number and lengths

In [None]:
fig = plt.figure(figsize=(3, 3))
sns.barplot(data=df, x="modification", y="true_peaks",
           capsize=.1, errwidth=2, edgecolor="black",
            order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
plt.xticks(rotation=45, ha='right')
plt.title('Sampled peaks')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'true_peaks_number.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
ts = []
for m, file in original_peaks.items():
    lengths = bedl(file)
    t = pd.DataFrame(dict(length=lengths))
    t = t.sample(min(len(t), 20_000))
    t['modification'] = m
    ts.append(t)
t = pd.concat(ts).reset_index(drop=True)
del ts
t

In [None]:
fig = plt.figure(figsize=(3, 3))
sns.boxplot(data=df, x="modification", y="length", showfliers=False,
            order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
plt.xticks(rotation=45, ha='right')
plt.title('Original length')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'true_peaks_lengths.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
ts = []
for _, (m, file) in df[['modification', 'true_peaks_file']].iterrows():
    lengths = bedl(file)
    t = pd.DataFrame(dict(length=lengths))
    t = t.sample(min(len(t), 20_000))
    t['modification'] = m
    ts.append(t)
t = pd.concat(ts).reset_index(drop=True)
del ts
t

In [None]:
fig = plt.figure(figsize=(3, 3))
sns.boxplot(data=df, x="modification", y="length", showfliers=False,
            order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
plt.xticks(rotation=45, ha='right')
plt.title('Sampled length')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'sampled_peaks_lengths.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="peaks", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'peaks.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
ts = []
for _, (m, q, tool, file) in df[['modification', 'quality', 'tool', 'file']].iterrows():
    lengths = bedl(file)
    t = pd.DataFrame(dict(length=lengths))
    t = t.sample(min(len(t), 20_000))
    t['modification'] = m
    t['quality'] = q
    t['tool'] = tool
    ts.append(t)
t = pd.concat(ts).reset_index(drop=True)
del ts
t

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.boxplot(data=t[t['quality']==q], x="modification", y="length", hue='tool', showfliers=False, ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'lengths.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

# Jaccard, precision, recall, F1

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="Jaccard", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'jaccard.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(15, 3))
axs = [plt.subplot(1, 6, i + 1) for i in range(6)]
for i, m in enumerate(['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed']):
    ax = axs[i]
    sns.lineplot(data=df[df['modification']==m], x="quality", y="Jaccard", hue='tool', ax=ax,
                 hue_order=['MACS2', 'MACS2 broad', 'SPAN', 'SICER'],
                 errorbar='sd')
    ax.xaxis.set_tick_params(rotation=45)
    ax.set_title(f'{m}')
    ax.set_ylim([0, 1.0])
    # Put a legend to the right of the current axis
    if i == 5:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'jaccard2.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="Precision", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'precision.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="PrecisionL", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'precision_len.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="Recall", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'recall.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="RecallL", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'recall_len.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="F1", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'f1.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.figure(figsize=(12, 4))
axs = [plt.subplot(1, 3, i + 1) for i in range(3)]
for i, q in enumerate(df['quality'].unique()):
    ax = axs[i]
    sns.barplot(data=df[df['quality']==q], x="modification", y="F1L", hue='tool', ax=ax,
                order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_title(f'{q}')
    # Put a legend to the right of the current axis
    if i == 2:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'f1l.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

# Average precision

AP summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold, with the increase in recall from the previous threshold used as the weight:

AP = ∑(Rn−Rn−1)*Pn, where  Pn and Rn are the precision and recall at the nth threshold.

This implementation is not interpolated and is different from computing the area under the precision-recall curve with the trapezoidal rule, which uses linear interpolation and can be too optimistic.

In [None]:
TOOLS = ['MACS2', 'MACS2 broad', 'SPAN', 'SICER']
palette = plt.cm.get_cmap('tab10')
TOOLS_PALETTE = {t: palette(i) for i, t in enumerate(TOOLS)}

plt.figure(figsize=(14, 3))
axs = [plt.subplot(1, 6, i + 1) for i in range(6)]
for i, m in enumerate(tqdm(['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])):
    ax = axs[i]
    for tool, n in product(df['tool'].unique(), range(1, N + 1)):
        dft = df[(df['modification'] == m) & (df['n'] == n) & (df['tool'] == tool)].copy()
        dft = dft[dft['Recall'] + dft['Precision'] > 0]
        dft.sort_values(by=['Recall', 'multiplier'], inplace=True)
        ax.plot(dft['Precision'], dft['Recall'], marker='o', label=tool, color=TOOLS_PALETTE[tool], alpha=0.8)
        ax.set_xlabel('Precision')
        ax.set_ylabel('Recall')
        ax.set_title(f'{m}')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'auc.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
TOOLS = ['MACS2', 'MACS2 broad', 'SPAN', 'SICER']
palette = plt.cm.get_cmap('tab10')
TOOLS_PALETTE = {t: palette(i) for i, t in enumerate(TOOLS)}

plt.figure(figsize=(14, 3))
axs = [plt.subplot(1, 6, i + 1) for i in range(6)]
for i, m in enumerate(tqdm(['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])):
    ax = axs[i]
    for tool, n in product(df['tool'].unique(), range(1, N + 1)):
        dft = df[(df['modification'] == m) & (df['n'] == n) & (df['tool'] == tool)].copy()
        dft = dft[dft['RecallL'] + dft['PrecisionL'] > 0]
        dft.sort_values(by=['RecallL', 'multiplier'], inplace=True)
        ax.plot(dft['PrecisionL'], dft['RecallL'], marker='o', label=tool, color=TOOLS_PALETTE[tool], alpha=0.8)
        ax.set_xlabel('PrecisionL')
        ax.set_ylabel('RecallL')
        ax.set_title(f'{m}')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'auc_len.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
t = []
for m, n, tool in tqdm(product(df['modification'].unique(), df['n'].unique(), df['tool'].unique())):
    print(m, n, tool)
    dft = df[(df['modification'] == m) & (df['n'] == n) & (df['tool'] == tool)].copy()
    dft = dft[dft['Recall'] + dft['Precision'] > 0]
    dft.sort_values(by=['Recall', 'multiplier'], inplace=True)
    ap = 0
    rprev = 0
    for _, row in dft.iterrows():
        ap += (row['Recall'] - rprev) * row['Precision']
        rprev = row['Recall']
        t.append((m, n, tool, ap))
dfap = pd.DataFrame(data=t, columns=['modification', 'n', 'tool', 'AP'])
dfap

In [None]:
fig = plt.figure(figsize=(7, 4))
g_result = sns.barplot(data=dfap, x="modification", y="AP", hue='tool', capsize=.05, errwidth=2,
                       order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
g_result.axes.xaxis.set_tick_params(rotation=45)
g_result.axes.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title('Overlap vs ground truth peaks AUC')
plt.ylabel('AUC')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'ap.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()

In [None]:
t = []
for m, n, tool in tqdm(product(['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'], df['n'].unique(), df['tool'].unique())):
    print(m, n, tool)
    dft = df[(df['modification'] == m) & (df['n'] == n) & (df['tool'] == tool)].copy()
    dft = dft[dft['RecallL'] + dft['PrecisionL'] > 0]
    dft.sort_values(by=['RecallL', 'multiplier'], inplace=True)
    ap = 0
    rprev = 0
    for _, row in dft.iterrows():
        ap += (row['RecallL'] - rprev) * row['PrecisionL']
        rprev = row['RecallL']
        t.append((m, n, tool, ap))
dfapl = pd.DataFrame(data=t, columns=['modification', 'n', 'tool', 'APL'])
dfapl

In [None]:
fig = plt.figure(figsize=(7, 4))
g_result = sns.barplot(data=dfapl, x="modification", y="APL", hue='tool', capsize=.05, errwidth=2,
                       order=['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3', 'mixed'])
g_result.axes.xaxis.set_tick_params(rotation=45)
g_result.axes.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title('Overlap vs ground truth peaks AUC')
plt.ylabel('AUC')
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, f'apl.{EXT}'), bbox_inches='tight', dpi=300)
plt.show()