# GSE26320 Control
Please run GSE26320-* for all modifications with and without control.

In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

import pandas as pd
import seaborn as sns

sns.set_style("whitegrid")
import matplotlib.pyplot as plt
import numpy as np
import os
import tempfile


In [None]:
def bedl(file):
    try:
        tf = pd.read_csv(file, sep='\t', header=None)
        return tf[2] - tf[1]
    except:
        return np.zeros(0)  # Empty file


def lines(file):
    try:
        tf = pd.read_csv(file, sep='\t', header=None)
        return len(tf)
    except:
        return 0  # Empty file

def d(a, b):
    return a / b if b != 0 else 0

def last_col(file):
    try:
        cols = len(pd.read_csv(file, sep='\t', nrows=1, header=None).columns)
        return pd.read_csv(file, sep='\t', header=None, usecols=[cols - 1])[cols - 1]
    except:
        return np.zeros(0)  # Empty file

def sorted_file(file):
    ts = tempfile.mktemp()
    !cat {file} | sort -k1,1 -k2,2n > {ts}
    return ts


In [None]:
GSE26320_PATH_HG38 = os.path.expanduser('~/data/2023_GSE26320')
GSE26320_PATH_HG38_NO_CONTROL = os.path.expanduser('~/data/2023_GSE26320_no_control')
GSE26320_CELLS = ['GM12878', 'HMEC', 'HSMM', 'K562', 'NHEK', 'NHLF', 'H1', 'Huvec', 'HepG2']
# GSE26320_CELLS = ['GM12878',  'K562', 'H1']
# GSE26320_MODIFICATIONS = ['CTCF', 'H3K27ac', 'H3K27me3', 'H3K36me3', 'H3K4me1', 'H3K4me2', 'H3K4me3', 'H3K9ac', 'H4K20me1']
# GSE26320_MODIFICATIONS = ['H3K4me1']
GSE26320_REPS = ['rep1', 'rep2']

! mkdir -p {GSE26320_PATH_HG38}/pics

In [None]:
ts = []
for m in ['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3']:
    t = pd.read_csv(GSE26320_PATH_HG38 + f'/{m}_full_bench_df.csv.gz', compression='gzip')
    t['modification'] = m
    t['control'] = True
    ts.append(t)
    t = pd.read_csv(GSE26320_PATH_HG38_NO_CONTROL + f'/{m}_full_bench_df.csv.gz', compression='gzip')
    t['modification'] = m
    t['control'] = False
    ts.append(t)
    del t

full_bench_df_all = pd.concat(ts).reset_index(drop=True)
del ts
full_bench_df_all.sample(5)

In [None]:
plt.figure(figsize=(12, 3))
axs = [plt.subplot(1, 5, i + 1) for i in range(5)]
for i, m in enumerate(['H3K4me3', 'H3K27ac', 'H3K4me1', 'H3K27me3', 'H3K36me3']):
    ax = axs[i]
    g_results = sns.barplot(data=full_bench_df_all[full_bench_df_all['modification'] == m],
                            x='name', y='peaks', hue='control',
                            ax=ax,
                            capsize=.2, errwidth=2,
                            order=['MACS2', 'MACS2 broad', 'SPAN', 'SICER',
                                   # 'SPAN - MACS2', 'SPAN - MACS2 broad', 'SPAN - SICER',
                                   # 'MACS2 - SPAN', 'MACS2 broad - SPAN', 'SICER - SPAN',
                                   ],
                            hue_order=[True, False]
                            )
    ax.xaxis.set_tick_params(rotation=90)
    ax.set_ylabel('Peaks number')
    ax.set_title(m)
    # Put a legend to the right of the current axis
    if i == 4:
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        ax.legend().set_visible(False)
    if i > 0:
        ax.set_ylabel(None)
plt.tight_layout()
plt.savefig(f'{GSE26320_PATH_HG38}/pics/full_bench_df_all.pdf', bbox_inches='tight', dpi=300)
plt.show()