# Use the Hiccups Diff from Juicer to look for Differential Loops
this is different from the Compare Lists tool because this reruns hiccups on both samples and only counts a loop as being unique if 1) it only shows up in one of the samples and 2) if it has zero enrichment in the other sample (so not in the generated `enriched` bedpe file)  
so this is a more restrictive way of checking for unique loops between two files

In [None]:
import pandas as pd
import os

In [None]:
#directory containing all the sample folders. each sample folder should be named after the sample
JUICER_DIR="/path/to/juicer"

ANALYSIS_DIR = JUICER_DIR+"/overlap_analysis"
SAMPLES_DIR = JUICER_DIR+"/overlap_analysis/samples"
SAMPLES_NH_DIR = JUICER_DIR+"/overlap_analysis/samples_no_header"
SCRIPT_DIR = JUICER_DIR+"/overlap_analysis/scripts"
OVERLAP_DIR= JUICER_DIR+"/overlap_analysis/overlap"
SHUFFLE_DIR = JUICER_DIR+"/overlap_analysis/shuffle"
MISC_DIR = JUICER_DIR+"/overlap_analysis/misc"
HICCUPS_DIFF_DIR=JUICER_DIR+"/overlap_analysis/hiccups_diff"
RESULTS_DIR = JUICER_DIR+"/overlap_analysis/results"

In [None]:
!mkdir {HICCUPS_DIFF_DIR}
!mkdir {RESULTS_DIR}

In [None]:
#list of the sample names taken from the juicer directory
SAMPLES = sorted([ name for name in os.listdir(JUICER_DIR) if os.path.isdir(os.path.join(JUICER_DIR, name)) and 'HICS' in name ])
print(len(SAMPLES))
print(SAMPLES)


In [None]:
SAMPLES_65 = [sample for sample in SAMPLES if 'da65' in sample]
print(SAMPLES_65)

### for now only run this for samples with day 0 and day 65 (meaning just look at sample progression)

##### Swarm

In [None]:
with open(SCRIPT_DIR+"/hiccups_diff.swarm","w") as swarm_file:
    for sample in SAMPLES_65:
        sample_da0 = [s for s in SAMPLES if sample[0:13] in s and 'da0' in s][0]

        out_dir = f"{HICCUPS_DIFF_DIR}/{sample_da0}_{sample}"

        swarm_file.write(f"/path/to/juicer_tools hiccupsdiff {JUICER_DIR}/{sample_da0}/aligned/inter_30.hic {JUICER_DIR}/{sample}/aligned/inter_30.hic {SAMPLES_DIR}/{sample_da0}.bedpe {SAMPLES_DIR}/{sample}.bedpe {out_dir}\n")

    swarm_file.close()


In [None]:
print(f"swarm -f {SCRIPT_DIR}/hiccups_diff.swarm --module=juicer,CUDA/8.0  --partition gpu --g 18 --gres=gpu:k80:1 --time=2-0")

##### Independent Jobs

In [None]:
sample = SAMPLES_65[0]

sample_da0 = [s for s in SAMPLES if sample[0:13] in s and 'da0' in s][0]

out_dir = f"{HICCUPS_DIFF_DIR}/{sample_da0}_{sample}"
!mkdir {out_dir}
with open(f"{SCRIPT_DIR}/{sample_da0}_{sample}_hiccups_diff.sh","w") as bash_file:
    bash_file.write(f"#!/bin/bash \n\
module load juicer \n\
module load CUDA/8.0\n\
/path/to/juicer_tools hiccupsdiff {JUICER_DIR}/{sample_da0}/aligned/inter_30.hic {JUICER_DIR}/{sample}/aligned/inter_30.hic {SAMPLES_DIR}/{sample_da0}.bedpe {SAMPLES_DIR}/{sample}.bedpe {out_dir}")
    bash_file.close()
        


In [None]:
print(f"sbatch -p gpu --mem=18g --gres=gpu:k80:1 --mail-type=ALL --time=2-0 {SCRIPT_DIR}/HICS_PPMI51971_9029_da0_v1_S7_HICS_PPMI51971_9029_da65_v1_S8_hiccups_diff.sh \n")

## collect data

In [None]:
progression_df = pd.DataFrame()
for i in range(0,len(SAMPLES_65)):
    sample_da0 = [s for s in SAMPLES if SAMPLES_65[i][0:13] in s and 'da0' in s][0]
    if(os.path.exists(f"{HICCUPS_DIFF_DIR}/{sample_da0}_{SAMPLES_65[i]}/differential_loops1.bedpe")):
        
        da0 = pd.read_csv(f"{HICCUPS_DIFF_DIR}/{sample_da0}_{SAMPLES_65[i]}/differential_loops1.bedpe",sep='\t')
        da0_diff_count = len(da0.index)
        da65 = pd.read_csv(f"{HICCUPS_DIFF_DIR}/{sample_da0}_{SAMPLES_65[i]}/differential_loops2.bedpe",sep='\t')
        da65_diff_count = len(da65.index)
    
        da0_loop_count = len(pd.read_csv(f"{SAMPLES_DIR}/{sample_da0}.bedpe",sep='\t').index)
        da65_loop_count = len(pd.read_csv(f"{SAMPLES_DIR}/{SAMPLES_65[i]}.bedpe",sep='\t').index)
    
        da0_diff_percent = da0_diff_count/da0_loop_count * 100
        da65_diff_percent = da65_diff_count/da65_loop_count * 100
    
    
        df = pd.DataFrame(data={'Sample':[SAMPLES_65[i][0:SAMPLES_65[i].find('da')-1]],'Day 0 Differential Loops':[da0_diff_count],'Day 0 total Loops':[da0_loop_count],'Day 0 Differential Loops Percent':[da0_diff_percent],'Day 65 Differential Loops':[da65_diff_count],'Day 65 total Loops':[da65_loop_count],'Day 65 Differential Loops Percent':[da65_diff_percent]})
        progression_df = progression_df.append(df)
print(progression_df)
progression_df.to_csv(f"{RESULTS_DIR}/hiccups_diff_progression.csv", index=None)
    

## Validate
check one to make sure it worked

In [None]:
[s for s in SAMPLES if 'HICS_PPMI3666_3014' in s]

In [None]:
progression_df[progression_df['Sample']=='HICS_PPMI3666_3014']

In [None]:
val_d0_dc = pd.read_csv(f"{HICCUPS_DIFF_DIR}/HICS_PPMI3666_3014_da0_v1_S9_HICS_PPMI3666_3014_da65_v1_S4/differential_loops1.bedpe",sep='\t')
print(val_d0_dc.shape)
#print(val_d0_dc.head())

In [None]:
val_d0_l = pd.read_csv(f"{SAMPLES_DIR}/HICS_PPMI3666_3014_da0_v1_S9.bedpe",sep='\t')
print(val_d0_l.shape)
#print(val_d0_l.head())

In [None]:
print("day 0 percent:")
print(len(val_d0_dc.index))
print(len(val_d0_l.index))
print(str(len(val_d0_dc.index)/len(val_d0_l.index)*100))

In [None]:
val_d65_dc = pd.read_csv(f"{HICCUPS_DIFF_DIR}/HICS_PPMI3666_3014_da0_v1_S9_HICS_PPMI3666_3014_da65_v1_S4/differential_loops2.bedpe",sep='\t')
print(val_d65_dc.shape)
#print(val_d65_dc.head())

In [None]:
val_d65_l = pd.read_csv(f"{SAMPLES_DIR}/HICS_PPMI3666_3014_da65_v1_S4.bedpe",sep='\t')
print(val_d65_l.shape)
#print(val_d65_l.head())

In [None]:
print("day 65 percent:")
print(len(val_d65_dc.index))
print(len(val_d65_l.index))
print(str(len(val_d65_dc.index)/len(val_d65_l.index)*100))

make sure all the differential loops are in the original .bedpe and not in the other

In [None]:
#check differential day0 loops in day0 sample bedpe
print(val_d0_dc.shape)
print(val_d0_l.shape)

print(pd.merge(val_d0_dc, val_d0_l, on=['chr1','x1','x2','chr2','y1','y2'],how='inner').shape)

In [None]:
#check differential day65 loops in day65 sample bedpe
print(val_d65_dc.shape)
print(val_d65_l.shape)

print(pd.merge(val_d65_dc, val_d65_l, on=['chr1','x1','x2','chr2','y1','y2'],how='inner').shape)

In [None]:
#check differential day0 loops are not in day65 sample bedpe
print(val_d0_dc.shape)
print(val_d65_l.shape)

print(pd.merge(val_d0_dc, val_d65_l, on=['chr1','x1','x2','chr2','y1','y2'],how='inner').shape)

In [None]:
#check differential day65 loops are not in day0 sample bedpe
print(val_d65_dc.shape)
print(val_d0_l.shape)

print(pd.merge(val_d65_dc, val_d0_l, on=['chr1','x1','x2','chr2','y1','y2'],how='inner').shape)

In [None]:
#check differential day0 loops are not in the differential day65 loops
print(val_d0_dc.shape)
print(val_d65_dc.shape)

print(pd.merge(val_d0_dc, val_d65_dc, on=['chr1','x1','x2','chr2','y1','y2'],how='inner').shape)