# HiC Loop Overlap with PsychENCODE HiC Data
- **Author** - Frank Grenn
- **Date Started** - March 2020
- **Quick Description:** code to process the psychencode loop data to compare with our hic data. involves mapping their hg19 positions to hg38 and using bedtools to compare overlapping regions.
- **Data:**  
[PsychENCODE Data](http://resource.psychencode.org/)  
[PsychENCODE Paper](https://science.sciencemag.org/content/362/6420/eaat8464)

In [None]:
import pandas as pd
import os

In [None]:
TEMP = '/path/to/juicer/overlap_analysis/misc'
PE = TEMP+'/psychencode'
SCRIPT = '/path/to/juicer/overlap_analysis/scripts'

In [None]:
!mkdir {TEMP}
!mkdir {PE}

## 1) Liftover PsychENCODE Data from hg19 to hg38

### a) download the data, the liftover chain file

#### HiC Contact Matrices (Probably don't need but good to check)

In [None]:
!curl -o {PE}/PIP-01_DLPFC.10kb.txt.tar.gz http://resource.psychencode.org/Datasets/Pipeline/HiC_matrices/PIP-01_DLPFC.10kb.txt.tar.gz
!curl -o {PE}/PIP-02_DLPFC.40kb.txt.tar.gz http://resource.psychencode.org/Datasets/Pipeline/HiC_matrices/PIP-02_DLPFC.40kb.txt.tar.gz

In [None]:
!tar -xvf {PE}/PIP-01_DLPFC.10kb.txt.tar.gz --verbose -C {PE}
!tar -xvf {PE}/PIP-02_DLPFC.40kb.txt.tar.gz --verbose -C {PE}

#### The Loop Interaction Regions file 
will need to convert to bedpe format later  
also will need to convert from hg19 to hg38

In [None]:
!curl -o {PE}/Promoter-anchored_chromatin_loops.bed http://resource.psychencode.org/Datasets/Integrative/Promoter-anchored_chromatin_loops.bed

#### The Promoter Enhancer Region Files


In [None]:
!curl -o {PE}/INT-16_HiC_EP_linkages.csv http://resource.psychencode.org/Datasets/Integrative/INT-16_HiC_EP_linkages.csv
!curl -o {PE}/INT-16_HiC_EP_linkages_cross_assembly.csv http://resource.psychencode.org/Datasets/Integrative/INT-16_HiC_EP_linkages_cross_assembly.csv

#### The Liftover chain file


In [None]:
!curl -o {TEMP}/hg19ToHg38.over.chain.gz https://hgdownload.soe.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg38.over.chain.gz

### b) format the PsychENCODE loop file to a bedpe

In [None]:
!ls {PE}

expand the bed to a bedpe with ids for the rows

In [None]:
!cat {PE}/Promoter-anchored_chromatin_loops.bed | tail -n +2 | awk -v RS='\r?\n' 'BEGIN{OFS="\t"} {print $1,$2,$3,$1,$4,$5,"loop_"NR,".","+","-"}' > {PE}/Promoter-anchored_chromatin_loops.bedpe

In [None]:
!head {PE}/Promoter-anchored_chromatin_loops.bedpe

### c) now split into two separate bed files to liftover separately and merge together later

In [None]:
bedpe = pd.read_csv(f"{PE}/Promoter-anchored_chromatin_loops.bedpe", sep="\t", header=None)
bedpe.columns = ["chr1", "x1", "x2", "chr2", "y1", "y2", "loop_id", "score", "strand1", "strand2"]
print(bedpe.shape)
print(bedpe.head())

In [None]:
bed1 = bedpe[['chr1','x1','x2','loop_id']]
bed1.columns = ['chrom','chromStart','chromEnd','name']
bed1['chrom'] = 'chr'+bed1['chrom'].astype(str)
print(bed1.shape)
print(bed1.tail())
bed1.to_csv(f"{PE}/PE1.bed",sep="\t",index=None)

In [None]:
bed2 = bedpe[['chr2','y1','y2','loop_id']]
bed2.columns = ['chrom','chromStart','chromEnd','name']
bed2['chrom'] = 'chr'+bed2['chrom'].astype(str)
print(bed2.shape)
print(bed2.tail())
bed2.to_csv(f"{PE}/PE2.bed",sep="\t",index=None)

### d) Compare the original `Promoter-anchored_chromatin_loops.bed` to the `INT-16_HiC_EP_linkages_cross_assembly.csv`


In [None]:
bed = pd.read_csv(f"{PE}/Promoter-anchored_chromatin_loops.bed",sep = "\t")
bed.columns = ["chr", "x1", "x2", "y1", "y2"]
bed['chr'] = 'chr'+bed['chr'].astype(str)
print("cols:")
print(bed.columns)
print(bed.shape)
print(bed.head())

In [None]:
EP = pd.read_csv(f"{PE}/INT-16_HiC_EP_linkages_cross_assembly.csv")
print(EP.shape)
print(EP.head())

now merge on the hg19 col to see if there is anything in common with the original `Promoter-anchored_chromatin_loops.bed` 

In [None]:
hg19_merge = pd.merge(left=bed, right = EP, left_on = ["chr", "x1"], right_on = ["Enhancer_Chromosome_hg19","Transcription_Start_Site_hg19"], how  = "inner")
print(hg19_merge.shape)
print(hg19_merge.tail())

### e) Liftover the two bed files from hg19 to hg38 using i) the UCSC Liftover tool or ii) the CrossMap tool
probably use UCSC liftover since CrossMap seems to give duplicates


#### i) UCSC Liftover
here: https://genome.ucsc.edu/cgi-bin/hgLiftOver  
submit the `PE1.bed` and `PE2.bed`
* need to paste in the chr, x1, x2 (optional name col) columns without headers

#### ii) Use CrossMap to do the conversion
available on biowulf  
also required the chain file downloaded previously

In [None]:
print("module load crossmap")
print(f"crossmap bed {TEMP}/hg19ToHg38.over.chain.gz {PE}/PE1.bed > {PE}/PE1_crossmap_lift.bed")
print(f"crossmap bed {TEMP}/hg19ToHg38.over.chain.gz {PE}/PE2.bed > {PE}/PE2_crossmap_lift.bed")

In [None]:
PE1L = pd.read_csv(f"{PE}/PE1_crossmap_lift.bed", sep="\t",header=None)
PE1L.columns = ["chr_o","x1_o","x2_o","loop_o", "arrow", "chr_l", "x1_l", "x2_l", "loop_l"]
print(bed1.shape)
print(PE1L.shape)
print(PE1L.head())

In [None]:
PE2L = pd.read_csv(f"{PE}/PE1_crossmap_lift.bed", sep="\t",header=None)
PE2L.columns = ["chr_o","x1_o","x2_o","loop_o", "arrow", "chr_l", "x1_l", "x2_l", "loop_l"]
print(bed2.shape)
print(PE2L.shape)
print(PE2L.head())

### f) Merge the two bed files from the liftover

In [None]:
PE1_UCSC = pd.read_csv(f"{PE}/PE1_ucsc_liftover_hg38.bed", sep="\t", header = None)
PE1_UCSC.columns = ["chr1", "x1", "x2", "loop_id"]
print(PE1_UCSC.shape)
print(PE1_UCSC.head())

PE2_UCSC = pd.read_csv(f"{PE}/PE2_ucsc_liftover_hg38.bed", sep="\t", header = None)
PE2_UCSC.columns = ["chr2", "y1", "y2", "loop_id"]
print(PE2_UCSC.shape)
print(PE2_UCSC.head())

now inner join by loop_id

In [None]:
lift_bedpe = pd.merge(left = PE1_UCSC, right = PE2_UCSC, on = "loop_id", how = "inner")
lift_bedpe = lift_bedpe[['chr1','x1','x2','chr2','y1','y2','loop_id']]
print(lift_bedpe.shape)
print(lift_bedpe.head())
lift_bedpe.to_csv(f"{PE}/Promoter-anchored_chromatin_loops_hg38.bedpe",sep="\t",index=None)
lift_nh = lift_bedpe[['chr1','x1','x2','chr2','y1','y2']]
lift_nh.to_csv(f"{PE}/Promoter-anchored_chromatin_loops_hg38_nh.bedpe",sep="\t",index=None,header=None)

## 2) Check for overlap between our bedpes and PsychENCODE

constants for directories

In [None]:
#directory containing all the sample folders. each sample folder should be named after the sample
JUICER_DIR="/path/to/juicer"

ANALYSIS_DIR = JUICER_DIR+"/overlap_analysis"
SAMPLES_DIR = JUICER_DIR+"/overlap_analysis/samples"
SAMPLES_NH_DIR = JUICER_DIR+"/overlap_analysis/samples_no_header"
SCRIPT_DIR = JUICER_DIR+"/overlap_analysis/scripts"
OVERLAP_DIR= JUICER_DIR+"/overlap_analysis/overlap"
SHUFFLE_DIR = JUICER_DIR+"/overlap_analysis/shuffle"
MISC_DIR = JUICER_DIR+"/overlap_analysis/misc"
RESULTS_DIR = JUICER_DIR+"/overlap_analysis/results"

load the functions: ` %load path/to/overlapBedpeAndBed.py`

In [None]:
# %load new/overlapBedpeAndBed.py
import os
import pandas as pd


#swarm sample Bedpe and Single Bedpe Overlap
def generate_bedpe_bedpe_overlap_swarm(sample_list, sample_nh_dir, bedpe, otype, overlap_dir, script_dir):

    bedpe_name = os.path.splitext(os.path.basename(bedpe))[0]

    with open(f"{script_dir}/all_{bedpe_name}_overlap.swarm",'w') as file_handler:
        for sample in sample_list:
            file_handler.write(f"bedtools pairtopair -a {sample_nh_dir}/{sample}.bedpe -b {bedpe} -type {otype} > \
{overlap_dir}/{sample}_{bedpe_name}_overlap.txt\n")

    file_handler.close()
    os.system(f"swarm -f {script_dir}/all_{bedpe_name}_overlap.swarm --module bedtools --g 50")
    
#single Bedpe and Bed Overlap
def run_bedpe_bed_overlap(sample, sample_nh_dir, bed, otype, overlap_dir):
    bed_name = os.path.splitext(os.path.basename(bed))[0]
    os.system(f"module load bedtools; \
              bedtools pairtobed -a {sample_nh_dir}/{sample}.bedpe -b {bed} -type {otype} > {overlap_dir}/{sample}_{bed_name}_overlap.txt")
    
#copy the loop files and rename
def get_sample_loop_files(juicer_dir, sample_dir, sample_list):
    for sample in sample_list:
        os.system(f"cp {juicer_dir}/{sample}/aligned/inter_30_loops/merged_loops.bedpe {sample_dir}")
        os.system(f"mv {sample_dir}/merged_loops.bedpe {sample_dir}/{sample}.bedpe")

#reformat the loop files and relocate them
def get_no_header_loop_files(sample_dir, sample_nh_dir, sample_list):
    for sample in sample_list:
        #read the bedpe and prepend 'chr' to the two chromosome cols
        sample_bedpe = pd.read_csv(f"{sample_dir}/{sample}.bedpe",sep='\t')
        sample_bedpe['chr1'] = 'chr' + sample_bedpe['chr1'].astype(str)
        sample_bedpe['chr2'] = 'chr' + sample_bedpe['chr2'].astype(str)
        sample_bedpe=sample_bedpe[['chr1','x1','x2','chr2','y1','y2']]

        sample_bedpe.to_csv(f"{sample_nh_dir}/{sample}.bedpe", sep='\t', header=False, index=None, mode='w+')

#get a df of overlap data between a bedpe and bed 
def get_bedpe_bed_overlap_data(overlap_file, sample_file):
    
    sample_name = os.path.splitext(os.path.basename(sample_file))[0]
    sample_lines = len(pd.read_csv(sample_file,sep="\t",header=None).index)
    #read the overlap output
    #overlap_data = pd.read_csv(f"{overlap_dir}/{sample}_{bed_name}_overlap.txt",sep="\t",header=None)
    if(os.path.getsize(overlap_file) == 0):
    	return pd.DataFrame(data={'sample':[sample_name], 'counts':0, 'total':[sample_lines], 'percent':[0]})
    overlap_data = pd.read_csv(overlap_file, sep = "\t", header = None)
    
    #we only want the first 6 cols corresponding to the bedpe loops for now
    overlap_subset = overlap_data.iloc[:,:6]
    #remove duplicate rows
    unique_overlaps = overlap_subset.drop_duplicates()
    #count the number of overlaps
    overlap_counts = len(unique_overlaps.index)
    
    
    
    ret_df = pd.DataFrame(data={'sample':[sample_name], 'counts':[overlap_counts], 'total':[sample_lines], 'percent':[(overlap_counts/sample_lines * 100)]})
    
    return ret_df

#All sample bedpe overlap with one bed file
def generate_bedpe_bed_overlap_swarm(sample_list, sample_nh_dir , bed, otype, overlap_dir, script_dir):

    bed_name = os.path.splitext(os.path.basename(bed))[0]

    with open(f"{script_dir}/all_{bed_name}_overlap.swarm",'w') as file_handler:
        for sample in sample_list:
            file_handler.write(f"bedtools pairtobed -a {sample_nh_dir}/{sample}.bedpe -b {bed} -type {otype} > \
{overlap_dir}/{sample}_{bed_name}_overlap.txt\n")

    file_handler.close()
    !swarm -f {script_dir}/all_{bed_name}_overlap.swarm --module bedtools --g 50
    
    
    
#get a df of overlap data between a bedpe and bedpe
def get_bedpe_bedpe_overlap_data(overlap_file, sample_file):
    
    sample_name = os.path.splitext(os.path.basename(sample_file))[0]
    sample_lines = len(pd.read_csv(sample_file,sep="\t",header=None).index)
    #read the overlap output
    #overlap_data = pd.read_csv(f"{overlap_dir}/{sample}_{bed_name}_overlap.txt",sep="\t",header=None)
    if(os.path.getsize(overlap_file) == 0):
    	return pd.DataFrame(data={'sample':[sample_name], 'counts':0, 'total':[sample_lines], 'percent':[0]})
    overlap_data = pd.read_csv(overlap_file, sep = "\t", header = None)
    
    #we only want the first 6 cols corresponding to the bedpe loops for now
    overlap_subset = overlap_data.iloc[:,:6]
    #remove duplicate rows
    unique_overlaps = overlap_subset.drop_duplicates()
    #count the number of overlaps
    overlap_counts = len(unique_overlaps.index)
    
    
    
    ret_df = pd.DataFrame(data={'sample':[sample_name], 'counts':[overlap_counts], 'total':[sample_lines], 'percent':[(overlap_counts/sample_lines * 100)]})
    
    return ret_df

#get a df for all sample bedpe overlap with another bedpe
def get_bedpe_list_bedpe_overlap_data(sample_list, sample_nh_dir , bedpe, overlap_dir, delete = False):

    overlap_df = pd.DataFrame()
    bedpe_name = os.path.splitext(os.path.basename(bedpe))[0]
    for sample in sample_list:
        overlap_file = overlap_dir + '/'+sample+'_'+bedpe_name+'_overlap.txt'
        sample_file = sample_nh_dir + '/' + sample + '.bedpe'
        sample_overlap = get_bedpe_bedpe_overlap_data(overlap_file = overlap_file, sample_file = sample_file)
        overlap_df = overlap_df.append(sample_overlap, ignore_index = True)
        print(sample)
        if(delete):
            os.remove(overlap_file)
    return overlap_df

#get a df for all sample bedpe overlap with a bed file
def get_bedpe_list_bed_overlap_data(sample_list, sample_nh_dir , bed, overlap_dir, delete = False):

    overlap_df = pd.DataFrame()
    bed_name = os.path.splitext(os.path.basename(bed))[0]
    for sample in sample_list:
        overlap_file = overlap_dir + '/'+sample+'_'+bed_name+'_overlap.txt'
        sample_file = sample_nh_dir + '/' + sample + '.bedpe'
        sample_overlap = get_bedpe_bed_overlap_data(overlap_file = overlap_file, sample_file = sample_file)
        overlap_df = overlap_df.append(sample_overlap, ignore_index = True)
        print(sample)
        if(delete):
            os.remove(overlap_file)
    return overlap_df


#overlap samples with each other
def generate_bedpe_between_overlap_swarm(sample_list, sample_nh_dir, otype, overlap_dir, script_dir):

    with open(f"{script_dir}/all_samples_between_overlap.swarm",'w') as file_handler:
        for sample1 in sample_list:
        
            for sample2 in sample_list:
                if(sample1 != sample2):
                    file_handler.write(f"bedtools pairtopair -a {sample_nh_dir}/{sample1}.bedpe -b {sample_nh_dir}/{sample2}.bedpe -type {otype} > \
{overlap_dir}/{sample1}_{sample2}_overlap.txt\n")
            


    file_handler.close()
    os.system(f"swarm -f {script_dir}/all_samples_between_overlap.swarm --module bedtools --g 50")

#get df/matrix of between sample overlap data
def get_bedpe_between_overlap_data(sample_list, sample_nh_dir, overlap_dir, delete = False):

    sample_list = sorted(sample_list)
    all_percent_data={}
    all_count_data={}
    for sample in sample_list:
        if(os.stat(f"{sample_nh_dir}/{sample}.bedpe").st_size != 0):
            sample_lines = len(pd.read_csv(f"{sample_nh_dir}/{sample}.bedpe",sep="\t").index)
        else:
            sample_lines=0

        comp_percent_data={}
        comp_count_data={}

        for comp_sample in sample_list:
            if sample!=comp_sample:
                if(os.stat(f"{overlap_dir}/{sample}_{comp_sample}_overlap.txt").st_size !=0 ):
                    
                    sample_file = f"{sample_nh_dir}/{sample}.bedpe"
                    overlap_file = f"{overlap_dir}/{sample}_{comp_sample}_overlap.txt"
                    df = get_bedpe_bedpe_overlap_data(overlap_file,sample_file)
                    comp_count_data[comp_sample] = df.iloc[0]['counts']
                    comp_percent_data[comp_sample] = df.iloc[0]['percent']
                    


            if sample==comp_sample:
                comp_percent_data[comp_sample]=None
                comp_count_data[comp_sample]=None
        all_percent_data[sample]=comp_percent_data
        all_count_data[sample]=comp_count_data

    percent_df = pd.DataFrame(data=all_percent_data)
    count_df = pd.DataFrame(data=all_count_data)
    return count_df, percent_df

#shuffle a bedpe file n times
def shuffle_bedpe(sample, n, sample_nh_dir, shuffle_dir, sizes):

    #just shuffle once
    if(n==1):
        !(module load bedtools; \
     bedtools shuffle -i {sample_nh_dir}/{sample}.bedpe -g {sizes} -bedpe > {shuffle_dir}/{sample}_shuffle.bedpe)
    else:
        for i in range(n):
            !(module load bedtools; \
     bedtools shuffle -i {sample_nh_dir}/{sample}.bedpe -g {sizes} -bedpe > {shuffle_dir}/{sample}_shuffle_{i}.bedpe)

#shuffle all the sample bedpes n times
def shuffle_bedpe_list(sample_list, n, sample_nh_dir, shuffle_dir, sizes):

    for sample in sample_list:
        shuffle_bedpe(sample = sample, n = n, sample_nh_dir = sample_nh_dir, shuffle_dir = shuffle_dir, sizes = sizes)

#check for overlap with a bed file in all shuffled sample bedpes
def shuffle_swarm_overlap(sample_list, n, sample_nh_dir , bed, otype, shuffle_dir, overlap_dir, script_dir):

    bed_name = os.path.splitext(os.path.basename(bed))[0]
    

    with open(f"{script_dir}/all_{bed_name}_shuffle_overlap.swarm",'w') as file_handler:
        if(n==1):
            for sample in sample_list:
                file_handler.write(f"bedtools pairtobed -a {shuffle_dir}/{sample}_shuffle.bedpe -b {bed} -type {otype} > \
{overlap_dir}/{sample}_shuffle_{bed_name}_overlap.txt\n")
        if(n>1):
            for sample in sample_list:
                for i in range(n):
                    file_handler.write(f"bedtools pairtobed -a {shuffle_dir}/{sample}_shuffle_{i}.bedpe -b {bed} -type {otype} > \
{overlap_dir}/{sample}_shuffle_{i}_{bed_name}_overlap.txt\n")
    file_handler.close()
    !swarm -f {script_dir}/all_{bed_name}_shuffle_overlap.swarm --module bedtools --g 50
    
#get overlap data for all shuffled bedpe files
def get_bedpe_list_bed_shuffle_overlap_data(sample_list, shuffle_dir, n, bed, overlap_dir, delete = False):
    overlap_df = pd.DataFrame()
    bed_name = os.path.splitext(os.path.basename(bed))[0]
 
    if(n==1):
        for sample in sample_list:
            overlap_file = overlap_dir + '/'+sample+'_shuffle_'+bed_name+'_overlap.txt'
            sample_file = shuffle_dir + '/' + sample + '_shuffle.bedpe'
            sample_overlap = get_bedpe_bed_overlap_data(overlap_file = overlap_file, sample_file = sample_file)
            overlap_df = overlap_df.append(sample_overlap, ignore_index = True)
            print(sample)
            if(delete):
                os.remove(overlap_file)
                os.remove(sample_file)
    elif (n>1):
        for sample in sample_list:
            for i in range(n):
                overlap_file = overlap_dir + '/'+sample+'_shuffle_' + str(i) +'_'+bed_name+'_overlap.txt'
                sample_file = shuffle_dir + '/' + sample + '_shuffle_' + str(i) + '.bedpe'
                sample_overlap = get_bedpe_bed_overlap_data(overlap_file = overlap_file, sample_file = sample_file)
                overlap_df = overlap_df.append(sample_overlap, ignore_index = True)
                print(sample + ' ' + str(i))
                if(delete):
                    os.remove(overlap_file)
                    os.remove(sample_file)
    return overlap_df


make directories and run the setup functions

In [None]:
!mkdir {ANALYSIS_DIR}
!mkdir {SAMPLES_DIR}
!mkdir {SAMPLES_NH_DIR}
!mkdir {SCRIPT_DIR}
!mkdir {OVERLAP_DIR}
!mkdir {SHUFFLE_DIR}
!mkdir {MISC_DIR}
!mkdir {RESULTS_DIR}

In [None]:
#list of the sample names taken from the juicer directory
SAMPLES = [ name for name in os.listdir(JUICER_DIR) if os.path.isdir(os.path.join(JUICER_DIR, name)) and 'HICS' in name ]
print(len(SAMPLES))
print(SAMPLES)
get_sample_loop_files(JUICER_DIR, SAMPLES_DIR, SAMPLES)
get_no_header_loop_files(SAMPLES_DIR, SAMPLES_NH_DIR, SAMPLES)

run the overlap function

In [None]:
TYPE='both'
BEDPE_FILE=f"{PE}/Promoter-anchored_chromatin_loops_hg38_nh.bedpe"

In [None]:
generate_bedpe_bedpe_overlap_swarm(sample_list= SAMPLES, sample_nh_dir=SAMPLES_NH_DIR, bedpe=BEDPE_FILE, otype=TYPE, overlap_dir= OVERLAP_DIR, script_dir=SCRIPT_DIR)

collect the overlap data

In [None]:
data = get_bedpe_list_bedpe_overlap_data(sample_list= SAMPLES, sample_nh_dir=SAMPLES_NH_DIR , bedpe=BEDPE_FILE, overlap_dir= OVERLAP_DIR, delete = False)


In [None]:
data

In [None]:
data.to_csv(f"{RESULTS_DIR}/psychencode_loop_overlap.csv")

## Validate

In [None]:
#get all sample 1 loops
sample1_loops = pd.read_csv(f"{SAMPLES_DIR}/HICS_CS25i_d0_S9.bedpe",sep='\t')
print(sample1_loops.shape)
#print(sample1_loops.head())

In [None]:
#get all psychencode loops
PE_loops = pd.read_csv(f"{BEDPE_FILE}",sep='\t',header=None)
print(PE_loops.shape)
#print(PE_loops.head())

In [None]:
#get the sample1 overlap with PE and dedup to get unique loops that overlap
sample1_pe_overlap = pd.read_csv(f"{OVERLAP_DIR}/HICS_CS25i_d0_S9_Promoter-anchored_chromatin_loops_hg38_nh_overlap.txt",sep='\t',header=None)
print(sample1_pe_overlap.shape)
print(sample1_pe_overlap.head())

print("deduplicated")
s1_pe_overlap_s1_loops = sample1_pe_overlap.iloc[:,0:6]
s1_pe_overlap_s1_loops_dedup = s1_pe_overlap_s1_loops.drop_duplicates()
print(s1_pe_overlap_s1_loops_dedup.shape)

In [None]:
#calculate the percent overlap for sample1
print(len(s1_pe_overlap_s1_loops_dedup.index))
print(len(sample1_loops.index))
print(str(len(s1_pe_overlap_s1_loops_dedup.index)/len(sample1_loops.index)*100))