#### Summary:
In this notebook I'll create importance score plots for peaks at the Lepr locus. For now, I'll just use previously developed methods to find all peaks in my previously generated importance scores at a particular loci. However, I want to extract them for all 4 models I made, so this might get a bit complicated bc I have 2 sets of importance score peaks which may or may not line up (will have to see).

In [15]:
from __future__ import print_function
import numpy as np
import pandas as pd
from collections import Counter
import sys
import random
import os
import fnmatch
import sys
from Bio import SeqIO
from Bio.Seq import Seq
import seaborn as sns
import matplotlib.pyplot as plt
import glob

In [2]:
dataDirectory = '/home/h1bennet/strains/data/ATAC/control_cohort2/'
workingDirectory = '/home/h1bennet//strains_machinelearning/results/00_New_ATAC_H3K27Ac_Model/'
if not os.path.isdir(workingDirectory):
    os.mkdir(workingDirectory)
os.chdir(workingDirectory)

In [3]:
if not os.path.isdir('./deeplift_plots/'):
    os.mkdir('./deeplift_plots/')

# Define functions

In [21]:
def load_genome(ref_path):
    ref_dict = {}
    for seq in SeqIO.parse(ref_path, "fasta"):
        chromID = seq.id
        chromSeq = (str(seq.seq)).upper()
        ref_dict[chromID] = chromSeq
    return ref_dict

ref_path = "/home/zes017/genomes/mm10/mm10.fa"
genomes = load_genome(ref_path)

def data_prep(path, genomes, label, strain, size=500, rep=1, shift=None):
    print ("Scaled to %d" % (size))
    if shift is None:
        shifts = size//rep*np.arange(-rep//2+1,rep//2+1)
    elif type(shift) is int:
        shifts = np.abs(shift)*np.arange(-rep//2+1,rep//2+1)
    elif type(shift) is list:
        if len(shift) != rep:
            raise TypeError("Number of shift unequal to replicates")
        shifts = np.array(shift)
    if rep == 1 and shift is not None:
        random_shift = True
        print("Random shifting within %s" % (str(-np.abs(shift))+'~'+str(np.abs(shift))))
    else:
        random_shift = False
        print("Generate replicates by shifting %s bp" % (','.join(shifts.astype(str))))
    
    data_list = []
    for line in open(path):
        elems = line.split()
        chromID = elems[0]
        start, end = int(elems[1]), int(elems[2])
        length = end-start
        mid = (start+end)//2
        for s in shifts:
            if random_shift:
                new_mid = mid + np.random.choice(np.arange(-np.abs(shift), np.abs(shift)+1))
            else:
                new_mid = mid + s
            start = new_mid - size//2
            end = new_mid + size//2
            seq = genomes[chromID][start:end]
            if len(seq) != size:
                continue
                
            #created combined name term, ex: chr5_140648065_140648415_C57
            k = chromID + "_" + str(start) + "_" + str(end) + "_" + strain

            data_point = (seq, label, chromID, start, end, k)
            data_list.append(data_point)
    return data_list

def findOverlapPeaks(peak, pos_data):
    '''Function to find overlapping regions of the genome. Written by H.
    Mummey and edited by H. Bennett. Main edit by H. Bennett was to use
    integer ranges as a lazy way of determining overlap between peaks.
    Accepts:
    - peak (list): list of position data for target peak containing
    [chr (str), start (int), end (int)]
    - pos_data (list): list of positional data with each entry holding sequence,
    chr, start, end, and an ID.
    
    Returns:
    overlapping_peaks (list): list of entries in pos_data overlapping
    target peak'''
    overlapping_peaks = []
    
    for i in range(len(pos_data)):
        # for each iteration make a peak list
        data_peak = [pos_data[i][2], pos_data[i][3], pos_data[i][4]]
        
        # check whether on same chromosome
        if pos_data[i][2] == peak[0]:
            
            # generate ranges 
            ref_peak = set(np.arange(pos_data[i][3], pos_data[i][4]+1))
            test_peak = set(np.arange(peak[1], peak[2]+1))
            
            if len(ref_peak.intersection(test_peak)) > 0:
                print("overlap found", pos_data[i][-1])
                overlapping_peaks.append(pos_data[i])
            
    return overlapping_peaks

def FindVariants(variant_df, peak):

    '''Accepts:
    - variant_df (pd.DataFrame) produced by MMARGE annotate mutations.
    should be a peak file with the peakIDs absent or set as index so that 
    - peak (list) list produced by Hannah's wrapping programs
    with the following items: [chr (string), start (int), end (int)]
    

    returns:
    - balb_vars (list): list of positions within given region in peak
    containing a genomic mutation
    '''
    
    #find variants that overlap with a given peak
    balb_vars = []
    
    #first record all rows of variants_df that have ranges that overlap with chr_pos
    overlapping_peaks = []
    
    # filter based on chr membership to save time
    variant_df_same_chr = variant_df.loc[variant_df.loc[:, 'chr']==peak[0], :]
    
    for row in variant_df_same_chr.iterrows(): #manual search
        # var_peak = [row[1].chr, row[1].start, row[1].end]
        var_peak_range = set(np.arange(row[1].start, row[1].end)+1)
        test_peak_range = set(np.arange(peak[1], peak[2])+1)
        
        if len(var_peak_range.intersection(test_peak_range)) > 0:
            # print("overlap found", row[0])
            overlapping_peaks.append(row[0])
            
    #then record which variants are in the chr_pos range
    for overlap in overlapping_peaks:
        b_vars = variant_df.loc[overlap, 'BALBCJ - 1']        
        if (type(b_vars)) == str:
            parse1 = str(b_vars).split(",")
            for var in parse1:
                parse2 = var.split(":")
                balb_vars.append(int(parse2[0]))


    return balb_vars, overlapping_peaks

def OverlayVariantsGen2(variant_df, smooth_weights, pos_data, chr_pos, model,
                        ylim=1, scatter_kws={'s':10, 'c':'#636363'},
                        hline_kws={'c':'#bdbdbd','linestyle':'--','linewidth':1},
                        vline_kws={'c':'#3182bd','linestyle':'--','linewidth':1},
                        ax=None):
    '''Function to overlay variants with the smoothed deep lift importance
    scores. Written by H. Mummey and modified by H. Bennett. One thing
    to fix is how to access columns of variant DF.
    Accepts:
    - variant_df (pandas DataFrame): peak file with annotations of mutations.
    Requires that either peakIDs are set as index or not included (chr must be
    in column 1)
    - smooth_wights (numpy array): array of deeplift weights(?)
    - pos_data (list): list of positional data with each entry holding sequence,
    chr, start, end, and an ID.
    - chr_pos: string of chr position, this is usually pulled from the pos_data list
    of ids which has the structure chr_start_end, so the chrom position can be pulled
    using a string split on "_"
    - model (str): model used, i think this is just for titling the plot
    - ylim (int): set limit of y axis manually
    
    Returns:
    - ax (matplotlib axes object): plot of deepbind score with overlying variants.
    '''
    
    #deconstruct chr_pos
    peak = chr_pos.split("_")
    peak = peak[:3]
    peak[1] = int(peak[1])
    peak[2] = int(peak[2])
    
    #find variants
    balb_vars, peaks = FindVariants(variant_df, peak)
    # print("variants", balb_vars)
    
    #now output a figure where we draw vertical lines for variants
    #reconstruct the pos_data style s
    s = chr_pos
    
    # if axis don't exist, create them
    if ax == None:
        fig, ax = plt.subplots(figsize=(17,5))

    # extract weights
    w = np.abs(smooth_weights[np.array(pos_data)[:,-1] == s][0])
    
    # plot weights
    ax.scatter(np.arange(len(w)), w, zorder=10, **scatter_kws)
    
    # plot reference line
    ax.axhline(np.sort(w)[240], **hline_kws)

    # plot verticle lines for mutations
    var_weights = [] # append variant weights
    for bvar in balb_vars:
        ax.axvline(bvar-peak[1], **vline_kws)
        var_weights.append(w[bvar-peak[1]])
        
    # adjust plotting parameters                
    ax.set_xlim(0,300)
    ax.set_ylim(0,ylim)
    ax.set_title(model + ": " + chr_pos)
    ax.set_xlabel('position')
    ax.set_ylabel('abs. DeepLIFT score')
    ax.spines['right'].set_visible(False) # hide right spine
    ax.spines['top'].set_visible(False) # hide top spine
    
    plt.tight_layout()

    return balb_vars, var_weights

def extractVariantWeights(variant_df, smooth_weights, pos_data, chr_pos):
    '''Function to overlay variants with the smoothed deep lift importance
    scores. Written by H. Mummey and modified by H. Bennett. One thing
    to fix is how to access columns of variant DF.
    Accepts:
    - variant_df (pandas DataFrame): peak file with annotations of mutations.
    Requires that either peakIDs are set as index or not included (chr must be
    in column 1)
    - smooth_wights (numpy array): array of deeplift weights(?)
    - pos_data (list): list of positional data with each entry holding sequence,
    chr, start, end, and an ID.
    - chr_pos: string of chr position, this is usually pulled from the pos_data list
    of ids which has the structure chr_start_end, so the chrom position can be pulled
    using a string split on "_"
    
    Returns tuple of two lists
    - balb_vars: loci of variants within peak.
    - var_weights: weight of variants according to model.
    '''
    
    #deconstruct chr_pos
    peak = chr_pos.split("_")
    peak = peak[:3]
    peak[1] = int(peak[1])
    peak[2] = int(peak[2])
    
    #find variants
    balb_vars, var_peaks = FindVariants(variant_df, peak)
    # print("variants", balb_vars)
    
    #now output a figure where we draw vertical lines for variants
    #reconstruct the pos_data style s
    s = chr_pos
    
    # extract weights
    w = np.abs(smooth_weights[np.array(pos_data)[:,-1] == s][0])

    # plot verticle lines for mutations
    var_weights = [] # append variant weights
    # print(peak[1], peak[2])
    for bvar in balb_vars:
        if (bvar >= peak[1]) & (bvar < peak[2]): # make sure variant is within scored region
            # print(bvar)
            var_weights.append(w[bvar-peak[1]])
        
    return balb_vars, var_weights

# Read in MMARGE Variants

In [5]:
mutfile = "/home/h1bennet/strains_f1/data/annotate_mutations_merge_f0_f1.idr"
control_vars = pd.read_table(mutfile, sep='\t', index_col=0)
print(len(control_vars))
control_vars.head()

140427


Unnamed: 0_level_0,chr,start,end,strand,Stat,Parent files,Total subpeaks,f0_balb.idr,f0_c57.idr,f1_balb.idr,f1_c57bl6.idr,BALBCJ - 1
#name (cmd = mergePeaks f0_balb.idr f0_c57.idr f1_balb.idr f1_c57bl6.idr),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Merged-chr11-98789029-4,chr11,98788880,98789152,+,1000.0,f0_balb.idr|f0_c57.idr|f1_balb.idr|f1_c57bl6.idr,4,Merged-chr11-98789041-6,Merged-chr11-98788992-6,Merged-chr11-98789035-6,Merged-chr11-98789035-6,
Merged-chr11-48775620-2,chr11,48775424,48775817,+,739.333374,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-48775642-3,Merged-chr11-48775652-2,
Merged-chr11-115858855-2,chr11,115858550,115859160,+,959.75,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-115858827-2,Merged-chr11-115858827-2,
Merged-chr11-95354785-2,chr11,95354597,95354974,+,1000.0,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-95354800-3,Merged-chr11-95354800-3,
Merged-chr11-75629486-2,chr11,75629264,75629708,+,950.75,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-75629518-2,Merged-chr11-75629518-2,75629685:T->C


In [6]:
aj_balb_mutfile = "/home/h1bennet/strains_f1/data/annotate_mutations_merge_f0_f1_balbcj_aj_snps.idr"
control_vars_aj_balb = pd.read_table(aj_balb_mutfile, sep='\t', index_col=0)
print(len(control_vars_aj_balb))
control_vars_aj_balb.head()

140427


Unnamed: 0_level_0,chr,start,end,strand,Stat,Parent files,Total subpeaks,f0_balb.idr,f0_c57.idr,f1_balb.idr,f1_c57bl6.idr,BALBCJ - 1,AJ - 1
#name (cmd = mergePeaks f0_balb.idr f0_c57.idr f1_balb.idr f1_c57bl6.idr),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Merged-chr11-115858855-2,chr11,115858550,115859160,+,959.75,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-115858827-2,Merged-chr11-115858827-2,,"115858608:A->C,115858667:A->G,115858753:C->T,1..."
Merged-chr11-48775620-2,chr11,48775424,48775817,+,739.333374,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-48775642-3,Merged-chr11-48775652-2,,
Merged-chr11-98789029-4,chr11,98788880,98789152,+,1000.0,f0_balb.idr|f0_c57.idr|f1_balb.idr|f1_c57bl6.idr,4,Merged-chr11-98789041-6,Merged-chr11-98788992-6,Merged-chr11-98789035-6,Merged-chr11-98789035-6,,
Merged-chr11-95354785-2,chr11,95354597,95354974,+,1000.0,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-95354800-3,Merged-chr11-95354800-3,,"95354711:A->G,95354714:T->A,95354755:T->C,9535..."
Merged-chr11-75629486-2,chr11,75629264,75629708,+,950.75,f1_balb.idr|f1_c57bl6.idr,2,,,Merged-chr11-75629518-2,Merged-chr11-75629518-2,75629685:T->C,75629685:T->C


# Reading in Importance Scores and Data


### Pos_data (overlap peaks predicted to be positive by BOTH models)

In [7]:
# poised set
data_path = "./bed_files/poised_active_merged_peaks_scored_poised_filtered.bed"
poised_data = data_prep(data_path, genomes, 1, "C57", size=300, rep=1, shift=None)
print(len(poised_data))

Scaled to 300
Generate replicates by shifting 0 bp
38482


In [8]:
# active set
data_path = "./bed_files/poised_active_merged_peaks_scored_active_filtered.bed"
active_data = data_prep(data_path, genomes, 1, "C57", size=300, rep=1, shift=None)
print(len(active_data))

Scaled to 300
Generate replicates by shifting 0 bp
43537


### Read in Weights

In [9]:
#saved weights from interpretation nb (will smooth here in case we need raw)
#read in raw weights
poised_weights = pd.read_csv("./region_scores/poised_model_poised_peak_weights.txt",
                                 sep='\t',
                                 index_col=0)
print(poised_weights.shape)
#print(poised_weights.head())

#now convert to a np array
poised_weights_array = np.array(poised_weights)
poised_weights_array[0,0:10]

(38482, 300)


array([-0.0001682 , -0.00011876, -0.0003104 , -0.00196401, -0.00297288,
       -0.00138692, -0.00246708,  0.00057517,  0.00171606, -0.00367359])

In [10]:
#saved weights from interpretation nb (will smooth here in case we need raw)
#read in raw weights
active_weights = pd.read_csv("./region_scores/active_model_active_peak_weights.txt",
                                 sep='\t',
                                 index_col=0)
print(active_weights.shape)
#print(active_weights.head())

#now convert to a np array
active_weights_array = np.array(active_weights)
active_weights_array[0,0:10]

(43537, 300)


array([-7.5058045e-04, -2.1890788e-03, -9.6546850e-05, -1.5507314e-03,
       -1.4194234e-04, -1.7118760e-03,  4.0145720e-04,  1.1002405e-03,
       -1.1146057e-03,  1.1454145e-03])

### Smooth Weights

In [11]:
proc_range = 1
poised_smooth_weights = np.array([[np.mean(np.abs(w[max(0, i-proc_range):min(len(w), i+proc_range+1)]))
                         for i in range(len(w))] for w in poised_weights_array])
print(poised_smooth_weights.shape)

(38482, 300)


In [12]:
proc_range = 1
active_smooth_weights = np.array([[np.mean(np.abs(w[max(0, i-proc_range):min(len(w), i+proc_range+1)]))
                         for i in range(len(w))] for w in active_weights_array])
print(active_smooth_weights.shape)

(43537, 300)


# Temporary section

## Extract smoothed weights of given locus

In [24]:
test_var, test_weight = extractVariantWeights(control_vars,
                                              poised_smooth_weights,
                                              poised_data,
                                              marco_pos[0][-1])

overlap found Merged-chr1-120528241-1


In [25]:
test_var

[120528132]

In [26]:
test_weight

[0.0030830305000000002]

### Can we iterate through a peak list

In [77]:
def extractVariantWeights(variant_df, smooth_weights, pos_data, chr_pos):
    '''Function to overlay variants with the smoothed deep lift importance
    scores. Written by H. Mummey and modified by H. Bennett. One thing
    to fix is how to access columns of variant DF.
    Accepts:
    - variant_df (pandas DataFrame): peak file with annotations of mutations.
    Requires that either peakIDs are set as index or not included (chr must be
    in column 1)
    - smooth_wights (numpy array): array of deeplift weights(?)
    - pos_data (list): list of positional data with each entry holding sequence,
    chr, start, end, and an ID.
    - chr_pos: string of chr position, this is usually pulled from the pos_data list
    of ids which has the structure chr_start_end, so the chrom position can be pulled
    using a string split on "_"
    
    Returns tuple of two lists
    - balb_vars: loci of variants within peak.
    - var_weights: weight of variants according to model.
    '''
    
    #deconstruct chr_pos
    peak = chr_pos.split("_")
    peak = peak[:3]
    peak[1] = int(peak[1])
    peak[2] = int(peak[2])
    
    #find variants
    balb_vars, var_peaks = FindVariants(variant_df, peak)
    # print("variants", balb_vars)
    
    #now output a figure where we draw vertical lines for variants
    #reconstruct the pos_data style s
    s = chr_pos
    
    # extract weights
    w = np.abs(smooth_weights[np.array(pos_data)[:,-1] == s][0])

    # plot verticle lines for mutations
    var_weights = [] # append variant weights
    # print(peak[1], peak[2])
    for bvar in balb_vars:
        if (bvar >= peak[1]) & (bvar < peak[2]): # make sure variant is within scored region
            # print(bvar)
            var_weights.append(w[bvar-peak[1]])
        
    return balb_vars, var_weights

In [18]:
np.sort(glob.glob('/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/*'))

array(['/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_cis_balb_hunterFCThenPadj_distal_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_cis_balb_hunterFCThenPadj_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_cis_c57_hunterFCThenPadj_distal_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_cis_c57_hunterFCThenPadj_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_mixed_balb_hunterFCThenPadj_distal_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_mixed_balb_hunterFCThenPadj_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_mixed_c57

In [None]:
atac_mixed_c57_hunterFCThenPadj_distal_resized

In [28]:
finished_list = ['/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_mixed_c57_hunterFCThenPadj_distal_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_mixed_c57_hunterFCThenPadj_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_trans_balb_hunterFCThenPadj_distal_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_trans_balb_hunterFCThenPadj_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_trans_c57_hunterFCThenPadj_distal_resized.txt',
       '/gpfs/data01/glasslab/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/atac_trans_c57_hunterFCThenPadj_resized.txt']

In [30]:
# peak_max_weights = []

# for peakfile in np.sort(glob.glob('/home/h1bennet/strains_f1/results/00_CB6F1_Control_ATAC/resized_peaks/*')):
for peakfile in finished_list:
    print('Working on:', peakfile.split('/')[-1], '...')
    df= pd.read_csv(peakfile, sep='\t', index_col=0)
    
    var_score_dict = {}
    max_score_dict = {}
    for row_id, row in df.iterrows():
        print(row_id)

        # find overlapping peaks
        peaks = findOverlapPeaks((row.chr, row.start, row.end),
                                 poised_data)

        

        var_loc, var_weight = ([], []) # initialize peak lists
        max_weight = np.nan # initialize max weight variable.
        if (len(peaks) > 0):
        # it shouldn't be possible for more than one peak to be found since we looked
        # for 300 bp peaks but they have to be separated by i think at least 300 bp?
            var_loc, var_weight = extractVariantWeights(control_vars,
                                                        poised_smooth_weights,
                                                        poised_data,
                                                        peaks[0][-1])
            # have to add this check because we filter variants that don't lie within
            # our peak of interest, so sometimes we can have overlap without a scoring
            # variant
            
            if len(var_weight) > 0:
                max_weight = max(var_weight)
            else:
                max_weight = np.nan
            
            if len(peaks) > 1:
                print("WARNING: More than 1 overlapping peak found")
            elif len(peaks) == 0:
                print("No overlap found with model peak set")
        
        var_score_dict[row_id] = (var_loc, var_weight)
        max_score_dict[row_id] = max_weight
    
    print('done!')
    peak_max_weights.append([max_score_dict, var_score_dict])

Working on: atac_mixed_c57_hunterFCThenPadj_distal_resized.txt ...
Merged-chr1-128729725-4
overlap found chr1_128729596_128729896_C57
Merged-chr1-55666411-4
overlap found chr1_55666207_55666507_C57
Merged-chr10-93124352-5
overlap found chr10_93124150_93124450_C57
Merged-chr11-106973203-4
overlap found chr11_106973043_106973343_C57
Merged-chr11-26604443-4
overlap found chr11_26604279_26604579_C57
Merged-chr11-32541486-4
Merged-chr11-44417616-4
overlap found chr11_44417452_44417752_C57
Merged-chr11-67047362-4
overlap found chr11_67047210_67047510_C57
Merged-chr11-8424416-4
Merged-chr12-35051182-4
overlap found chr12_35051024_35051324_C57
Merged-chr12-44919092-4
Merged-chr12-8384087-2
Merged-chr13-105266897-4
overlap found chr13_105266744_105267044_C57
Merged-chr13-116987434-4
overlap found chr13_116987325_116987625_C57
Merged-chr13-60674481-4
Merged-chr13-63877298-4
overlap found chr13_63877142_63877442_C57
Merged-chr13-95868990-4
overlap found chr13_95868859_95869159_C57
Merged-chr14-29

Merged-chr4-41503166-4
overlap found chr4_41503053_41503353_C57
Merged-chr4-45408788-4
overlap found chr4_45408632_45408932_C57
Merged-chr4-6227272-4
overlap found chr4_6227091_6227391_C57
Merged-chr4-88094584-4
overlap found chr4_88094452_88094752_C57
Merged-chr4-9657511-4
overlap found chr4_9657370_9657670_C57
Merged-chr5-100628154-4
Merged-chr5-123832474-4
overlap found chr5_123832361_123832661_C57
Merged-chr5-125394369-4
overlap found chr5_125394202_125394502_C57
Merged-chr5-146948648-4
overlap found chr5_146948484_146948784_C57
Merged-chr5-43808185-4
overlap found chr5_43808025_43808325_C57
Merged-chr5-4758136-4
Merged-chr5-52741590-4
overlap found chr5_52741431_52741731_C57
Merged-chr5-64298441-4
Merged-chr6-125313920-4
overlap found chr6_125313761_125314061_C57
Merged-chr6-135323573-4
Merged-chr7-104507895-4
overlap found chr7_104507758_104508058_C57
Merged-chr7-105628812-4
Merged-chr7-107044583-4
Merged-chr7-36720224-4
overlap found chr7_36720052_36720352_C57
Merged-chr7-985346

Merged-chr16-93817087-3
Merged-chr17-24463979-4
overlap found chr17_24463842_24464142_C57
Merged-chr17-29164582-4
Merged-chr17-29171259-4
Merged-chr17-31038577-4
overlap found chr17_31038429_31038729_C57
Merged-chr17-31075643-4
Merged-chr17-35353168-3
Merged-chr17-35359564-2
Merged-chr17-35780525-4
Merged-chr17-67317067-1
Merged-chr17-71264952-4
overlap found chr17_71264772_71265072_C57
Merged-chr17-71286131-4
overlap found chr17_71285974_71286274_C57
Merged-chr17-86445114-4
overlap found chr17_86444971_86445271_C57
Merged-chr17-86658039-3
Merged-chr17-86787992-2
Merged-chr17-87255858-4
overlap found chr17_87255710_87256010_C57
Merged-chr17-88524266-3
Merged-chr18-30087456-3
Merged-chr18-36713554-4
overlap found chr18_36713407_36713707_C57
Merged-chr18-5760478-3
Merged-chr18-62191683-4
overlap found chr18_62191513_62191813_C57
Merged-chr18-6531378-3
Merged-chr18-65495973-4
overlap found chr18_65495847_65496147_C57
Merged-chr18-65505996-4
Merged-chr18-65671160-3
Merged-chr18-68180936-1


Merged-chr7-118764159-4
overlap found chr7_118764006_118764306_C57
Merged-chr7-139219265-4
overlap found chr7_139219118_139219418_C57
Merged-chr7-19028277-3
Merged-chr7-27436924-2
Merged-chr7-29162995-3
Merged-chr7-31085306-3
Merged-chr7-35595048-3
Merged-chr7-38029412-3
Merged-chr7-45097684-4
overlap found chr7_45097567_45097867_C57
Merged-chr7-45098997-3
Merged-chr7-4950785-1
Merged-chr7-67526370-2
Merged-chr7-71873999-4
Merged-chr7-75118266-4
Merged-chr7-75638942-4
overlap found chr7_75638793_75639093_C57
Merged-chr7-75641959-4
Merged-chr7-80065509-3
Merged-chr7-80753901-1
Merged-chr7-81842048-1
Merged-chr7-82744535-3
Merged-chr7-88303697-4
overlap found chr7_88303542_88303842_C57
Merged-chr7-88310947-1
Merged-chr7-88327687-3
Merged-chr7-91716905-4
overlap found chr7_91716763_91717063_C57
Merged-chr7-96483064-4
overlap found chr7_96482915_96483215_C57
Merged-chr7-96946719-2
Merged-chr7-98793262-1
Merged-chr8-111546455-3
Merged-chr8-111596004-4
Merged-chr8-123849539-2
Merged-chr8-124

Merged-chr13-35953115-3
Merged-chr13-37797775-2
Merged-chr13-37965527-5
overlap found chr13_37965465_37965765_C57
Merged-chr13-44231310-4
Merged-chr13-44415305-4
overlap found chr13_44415210_44415510_C57
Merged-chr13-45267560-4
overlap found chr13_45267422_45267722_C57
Merged-chr13-45358403-3
Merged-chr13-45372406-2
Merged-chr13-45444896-4
overlap found chr13_45444695_45444995_C57
Merged-chr13-47231277-4
overlap found chr13_47231102_47231402_C57
Merged-chr13-56334998-4
overlap found chr13_56334905_56335205_C57
Merged-chr13-56626706-4
Merged-chr13-59836219-4
Merged-chr13-64040070-3
Merged-chr13-64307546-4
Merged-chr13-64398277-4
overlap found chr13_64398137_64398437_C57
Merged-chr13-75840266-2
Merged-chr13-76130093-3
Merged-chr13-76215614-3
Merged-chr13-76262637-3
Merged-chr13-76384644-5
Merged-chr13-76410833-1
Merged-chr13-83518414-3
Merged-chr13-85223370-3
Merged-chr13-91631569-3
Merged-chr13-91640423-4
Merged-chr13-93774002-3
Merged-chr13-93778349-1
Merged-chr13-95679308-3
Merged-chr

Merged-chr4-105640971-3
Merged-chr4-109136981-3
Merged-chr4-116777245-3
Merged-chr4-118106904-4
overlap found chr4_118106727_118107027_C57
Merged-chr4-118360572-3
overlap found chr4_118360388_118360688_C57
Merged-chr4-119965244-3
Merged-chr4-120501896-2
Merged-chr4-120872583-3
Merged-chr4-120934069-3
Merged-chr4-122840425-3
Merged-chr4-133586761-3
Merged-chr4-133613775-4
Merged-chr4-133659658-4
overlap found chr4_133659495_133659795_C57
Merged-chr4-133666362-3
Merged-chr4-133853687-3
Merged-chr4-133875290-3
Merged-chr4-137837990-4
Merged-chr4-138038620-4
Merged-chr4-139235729-3
overlap found chr4_139235584_139235884_C57
Merged-chr4-139238758-3
Merged-chr4-141713814-4
Merged-chr4-142094509-4
overlap found chr4_142094359_142094659_C57
Merged-chr4-148500344-3
Merged-chr4-150210547-1
Merged-chr4-150341012-3
overlap found chr4_150340858_150341158_C57
Merged-chr4-156255155-4
Merged-chr4-32249127-4
overlap found chr4_32249014_32249314_C57
Merged-chr4-32303858-3
Merged-chr4-32571660-4
overlap 

Merged-chr1-120528241-1
overlap found chr1_120528091_120528391_C57
Merged-chr1-127219357-4
overlap found chr1_127219194_127219494_C57
Merged-chr1-13295712-3
overlap found chr1_13295574_13295874_C57
Merged-chr1-133118357-3
Merged-chr1-156009175-3
Merged-chr1-156010220-3
Merged-chr1-156985922-2
Merged-chr1-161139690-4
overlap found chr1_161139528_161139828_C57
Merged-chr1-161728784-4
overlap found chr1_161728627_161728927_C57
Merged-chr1-171183685-3
Merged-chr1-172513123-4
overlap found chr1_172512960_172513260_C57
Merged-chr1-172526823-3
Merged-chr1-172586587-4
Merged-chr1-173412657-2
Merged-chr1-173682438-4
overlap found chr1_173682287_173682587_C57
Merged-chr1-191068600-4
overlap found chr1_191068433_191068733_C57
Merged-chr1-191599435-4
overlap found chr1_191599255_191599555_C57
Merged-chr1-192742097-1
Merged-chr1-192759633-3
overlap found chr1_192759477_192759777_C57
Merged-chr1-31050442-2
overlap found chr1_31050317_31050617_C57
Merged-chr1-51931511-3
Merged-chr1-77761598-1
overlap

Merged-chr18-39876020-4
Merged-chr18-46273765-4
overlap found chr18_46273592_46273892_C57
Merged-chr18-53153372-1
Merged-chr18-5764412-3
Merged-chr18-60789467-4
overlap found chr18_60789299_60789599_C57
Merged-chr18-61224466-4
overlap found chr18_61224325_61224625_C57
Merged-chr18-64954227-4
overlap found chr18_64954079_64954379_C57
Merged-chr18-64957673-4
overlap found chr18_64957540_64957840_C57
Merged-chr18-64958314-4
overlap found chr18_64958184_64958484_C57
Merged-chr18-70436023-3
overlap found chr18_70435859_70436159_C57
Merged-chr18-75839503-5
overlap found chr18_75839420_75839720_C57
Merged-chr18-75917925-3
Merged-chr18-80938238-4
overlap found chr18_80938087_80938387_C57
Merged-chr19-12475566-4
Merged-chr19-12591120-4
overlap found chr19_12590972_12591272_C57
Merged-chr19-15999487-3
Merged-chr19-16107890-3
Merged-chr19-16110791-4
overlap found chr19_16110666_16110966_C57
Merged-chr19-23038609-3
Merged-chr19-24432711-3
overlap found chr19_24432444_24432744_C57
Merged-chr19-2513

Merged-chr7-19202694-3
Merged-chr7-31105336-4
overlap found chr7_31105202_31105502_C57
Merged-chr7-44823594-2
Merged-chr7-4900312-4
Merged-chr7-63989909-4
overlap found chr7_63989768_63990068_C57
Merged-chr7-80777256-5
Merged-chr7-83720481-4
overlap found chr7_83720343_83720643_C57
Merged-chr7-97034387-3
Merged-chr7-97425017-4
Merged-chr7-97800331-3
overlap found chr7_97800217_97800517_C57
Merged-chr8-111526785-4
overlap found chr8_111526620_111526920_C57
Merged-chr8-112034876-4
Merged-chr8-122465148-4
Merged-chr8-123280768-5
overlap found chr8_123280751_123281051_C57
overlap found chr8_123280435_123280735_C57
Merged-chr8-125067613-9
overlap found chr8_125067531_125067831_C57
Merged-chr8-125462410-3
overlap found chr8_125462273_125462573_C57
Merged-chr8-126589838-2
Merged-chr8-126693803-3
Merged-chr8-14643246-4
Merged-chr8-26697480-1
overlap found chr8_26697330_26697630_C57
Merged-chr8-33698316-2
Merged-chr8-34743605-2
Merged-chr8-34744377-3
overlap found chr8_34744233_34744533_C57
Mer

Merged-chr13-20211957-4
overlap found chr13_20211802_20212102_C57
Merged-chr13-21549698-4
overlap found chr13_21549565_21549865_C57
Merged-chr13-23696317-3
Merged-chr13-23761987-2
Merged-chr13-33076334-3
Merged-chr13-33077304-6
overlap found chr13_33077190_33077490_C57
Merged-chr13-34216815-1
Merged-chr13-37537925-2
Merged-chr13-44480334-4
overlap found chr13_44480174_44480474_C57
Merged-chr13-46906141-4
overlap found chr13_46905975_46906275_C57
Merged-chr13-53377402-4
overlap found chr13_53377255_53377555_C57
Merged-chr13-54475929-4
overlap found chr13_54475764_54476064_C57
Merged-chr13-55182748-4
overlap found chr13_55182596_55182896_C57
Merged-chr13-59001404-3
Merged-chr13-59853787-2
Merged-chr13-59889913-3
Merged-chr13-60508464-3
Merged-chr13-60682951-3
overlap found chr13_60682774_60683074_C57
Merged-chr13-64126138-4
overlap found chr13_64125974_64126274_C57
Merged-chr13-64181123-3
Merged-chr13-67609691-4
overlap found chr13_67609556_67609856_C57
Merged-chr13-67797741-2
Merged-chr

Merged-chr2-155755922-4
overlap found chr2_155755775_155756075_C57
Merged-chr2-156316606-1
Merged-chr2-164877971-3
overlap found chr2_164877871_164878171_C57
Merged-chr2-164953418-3
Merged-chr2-165056611-5
overlap found chr2_165056387_165056687_C57
Merged-chr2-165947774-4
overlap found chr2_165947648_165947948_C57
Merged-chr2-165987141-1
Merged-chr2-168905004-2
Merged-chr2-169780131-1
Merged-chr2-180920471-2
Merged-chr2-181496266-8
overlap found chr2_181496118_181496418_C57
Merged-chr2-69803005-4
overlap found chr2_69802855_69803155_C57
Merged-chr2-71598771-7
overlap found chr2_71598423_71598723_C57
Merged-chr2-71843499-4
overlap found chr2_71843364_71843664_C57
Merged-chr2-72197234-3
Merged-chr2-83729540-4
overlap found chr2_83729409_83729709_C57
Merged-chr2-84504480-4
overlap found chr2_84504341_84504641_C57
Merged-chr2-84505227-3
overlap found chr2_84505053_84505353_C57
Merged-chr2-84734014-4
Merged-chr2-90533229-2
overlap found chr2_90533061_90533361_C57
Merged-chr2-91498430-4
Merg

Merged-chr7-142449357-4
overlap found chr7_142449208_142449508_C57
Merged-chr7-142458136-4
overlap found chr7_142457965_142458265_C57
Merged-chr7-142460743-4
overlap found chr7_142460578_142460878_C57
Merged-chr7-142462746-4
overlap found chr7_142462581_142462881_C57
Merged-chr7-143041290-3
overlap found chr7_143041118_143041418_C57
Merged-chr7-16296973-4
overlap found chr7_16296878_16297178_C57
Merged-chr7-19076077-4
overlap found chr7_19075912_19076212_C57
Merged-chr7-19202694-3
Merged-chr7-27604882-4
overlap found chr7_27604734_27605034_C57
Merged-chr7-29237720-4
Merged-chr7-29856499-2
Merged-chr7-30553566-4
overlap found chr7_30553042_30553342_C57
Merged-chr7-30860110-3
Merged-chr7-31105336-4
overlap found chr7_31105202_31105502_C57
Merged-chr7-3290418-4
overlap found chr7_3290280_3290580_C57
Merged-chr7-44823594-2
Merged-chr7-45053836-2
Merged-chr7-4866267-4
overlap found chr7_4866124_4866424_C57
Merged-chr7-4900312-4
Merged-chr7-51879362-3
Merged-chr7-55961676-4
overlap found chr

In [31]:
len(peak_max_weights)

12

In [23]:
extractVariantWeights(control_vars,
                                                        poised_smooth_weights,
                                                        poised_data,
                                                        peaks[0][-1])

([116987255, 116987319], [])