In [3]:
import scipy
import scipy.sparse, scipy.stats
import numpy
import os
import re
import multiprocessing
import subprocess
import pickle
import matplotlib.pyplot
import time
import statistics
import sklearn
import statsmodels , statsmodels.api
import itertools

In [4]:
def moving_integration (values, window):
    weights = numpy.repeat(1.0, window)
    sma = numpy.convolve(values, weights, 'same')
    return sma
def moving_average (values, window):
    weights = numpy.repeat(1.0, window)/window
    sma = numpy.convolve(values, weights, 'same')
    return sma

In [5]:
#needed to load the variables
with open("./testdata/variables.pi","rb") as picklefile:
    frag_index,frag_prop,frag_amount,valid_chroms,chroms_offsets = pickle.load(picklefile)

In [6]:
with open("./testdata/peaks_chr1_mumbach.pi" ,"rb") as picklein:
    smoothed_diagonal,refined_peaks = pickle.load(picklein)

In [7]:
CSR_mat_full = scipy.sparse.load_npz('./testdata/sparse_matrix_mumbach_non_reassigned_chr1.npz')

In [8]:
#return list of tuples with start and end indexes for peaks. optionally filter by things larger than something

peaks=[]

i = 0
while i < len(refined_peaks):
    if refined_peaks[i] == 1:
        start = i
        while refined_peaks[i] == 1:
            i=i+1
        end = i
        peaks.append((start,end))
    i=i+1

print(len(peaks))



3158


In [None]:
#interactive visualization of peaks viewpoints
%matplotlib qt
from ipywidgets import *
fig = matplotlib.pyplot.figure()
ax = fig.add_subplot(111)
matplotlib.pyplot.ion()
def interactplot(i):
    ax.clear()
    chip=smoothed_diagonal[peaks[i][0]-1500:peaks[i][1]+1500]
    bait=moving_integration(CSR_mat_full[peaks[i][0]:peaks[i][1],].toarray().sum(axis=0).tolist()[peaks[i][0]-1500:peaks[i][1]+1500],3)
    ax.plot(chip)
    ax.plot(bait)
    fig.canvas.draw()
    
    ratio = []
    for x, y in zip(bait, chip):
        ratio.append(x*100/(y+10))
    ax.plot(moving_integration(ratio,20))
    
    print(frag_prop[peaks[i][0]])
    print(frag_prop[peaks[i][0]-1500])
    print(frag_prop[peaks[i][1]+1500])
interact(interactplot,i=widgets.IntSlider(min=1,max=len(peaks)-1,step=1,value=1));

In [92]:
distance_data=[]
chip_i=[]
chip_k=[]
interactions=[]

for i in range(len(peaks)):
    for k in range(len(peaks)):
        a,b,c,d = peaks[i][0],peaks[i][1],peaks[k][0],peaks[k][1]
        distance = frag_prop[d][1]-frag_prop[a][2]
        if distance < 500000 and distance > 40000:        
            interactions.append(CSR_mat_full[a:b,c:d].sum())
            chip_i.append(smoothed_diagonal[a:b].sum())
            chip_k.append(smoothed_diagonal[c:d].sum())
            distance_data.append(distance)

In [10]:
%matplotlib qt
from mpl_toolkits.mplot3d import Axes3D
fig = matplotlib.pyplot.figure(figsize=(15, 10), dpi=96)
ax = fig.add_subplot(111, projection='3d')



from matplotlib.animation import FuncAnimation, PillowWriter

def init():
    ax.scatter(distance_data,interactions,[a+b for a,b in zip(chip_i,chip_k)])
    ax.set_zlim3d((0,10000))
    ax.set_ylim3d((0,40))
    ax.set_xlim3d((0,350000))
    return fig,

def animate(i):
    ax.view_init(elev=1., azim=10*i)
    return fig,

# Animate




anim = FuncAnimation(fig, animate, init_func=init,
                               frames=36, interval=200)

anim.save("testdata/animations/additive_chip_on_z.gif", writer=PillowWriter(fps=5))
    

In [28]:
%matplotlib qt
from mpl_toolkits.mplot3d import Axes3D
fig = matplotlib.pyplot.figure(figsize=(15, 10), dpi=96)
ax = fig.add_subplot(111, projection='3d')



from matplotlib.animation import FuncAnimation, PillowWriter

def init():
    ax.scatter(distance_data,interactions,[a*b for a,b in zip(chip_i,chip_k)])
    ax.set_zlim3d((0,4000000))
    ax.set_ylim3d((0,40))
    ax.set_xlim3d((0,350000))
    return fig,

def animate(i):
    ax.view_init(elev=1., azim=10*i)
    return fig,

# Animate




anim = FuncAnimation(fig, animate, init_func=init,
                               frames=36, interval=200)

anim.save("testdata/animations/multiplicative_chip_on_z.gif", writer=PillowWriter(fps=5))
    

In [86]:
fig = matplotlib.pyplot.figure(figsize=(15, 10), dpi=96)
ax = fig.add_subplot(111)

sc = ax.scatter([a*b for a,b in zip(chip_i,chip_k)],interactions,c=distance_data, cmap="RdYlBu", alpha = 1,s = 5,vmin=0, vmax=500000)
matplotlib.pyplot.colorbar(sc)
    
    
fig.show()

AttributeError: 'numpy.ndarray' object has no attribute 'median'

In [52]:
fig = matplotlib.pyplot.figure(figsize=(15, 10), dpi=96)
ax = fig.add_subplot(111)

sc = ax.scatter(distance_data,[a*b for a,b in zip(chip_i,chip_k)],c=interactions, cmap="inferno", alpha = 1,s = 5,vmin=0, vmax=200)
matplotlib.pyplot.colorbar(sc)
matplotlib.pyplot.yscale("log")    
    
fig.show()

In [56]:
fig = matplotlib.pyplot.figure(figsize=(15, 10), dpi=96)
ax = fig.add_subplot(111)

sc = ax.scatter([a*b for a,b in zip(chip_i,chip_k)],interactions,alpha=0.05)
   
    
fig.show()

In [65]:
x=[a+b for a,b in zip(chip_i,chip_k)]
y = statsmodels.api.nonparametric.lowess(interactions, x,return_sorted=False,frac=0.2,delta=100)
%matplotlib qt
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x,y)
fig.show()

In [93]:
x=[a*b for a,b in zip(chip_i,chip_k)]
y = statsmodels.api.nonparametric.lowess(interactions, x,return_sorted=False,frac=0.2,delta=10000)
%matplotlib qt
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x,y)
fig.show()

interactions_folds = [a/b for a,b in zip(interactions,y)]

#seems that this is very good

In [96]:
x=distance_data
y = statsmodels.api.nonparametric.lowess(interactions_folds, x,return_sorted=False,frac=0.2,delta=1000)
%matplotlib qt
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x,y)
fig.show()

interactions_folds_distance = [a/b for a,b in zip(interactions_folds,y)]


In [98]:
fig, ax = matplotlib.pyplot.subplots()
ax.plot(interactions_folds_distance[50000:100000])
ax.plot([i/1000 for i in chip_i[50000:100000]])
fig.show()