Skip to content

Commit

Permalink
Merge pull request #41 from riscalab/master
Browse files Browse the repository at this point in the history
adding docs / cleaning up code
  • Loading branch information
qmacAstanford committed Jul 30, 2020
2 parents af8fc8f + ee47d42 commit 70221fc
Show file tree
Hide file tree
Showing 24 changed files with 1,527 additions and 885 deletions.
493 changes: 441 additions & 52 deletions analysis/MCsim.py

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions analysis/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
this folder contains parser/helper files for wlcsim analysis

it was written for the use of the risca lab, but anyone else is free to use/edit it!

main code is MCsim which is a wlcsim output parser with some analysis methods, such as:
* visualize trajectory in pymol
* end-to-end distance
* pairwise nucleosome distances
* interpolate basepair resolution DNA structure between coarse-graied wlcsim beads
* determine ricc-seq break patterns from interpolated fiber structure

some minimal helper functions also exist in utility, such as:
* apply rotation/translation of linker DNA into and out of nucleosome
* parametrize DNA as two offset helices to build a phosphate-base-phosphate representation of B-DNA
119 changes: 119 additions & 0 deletions analysis/movieCoarse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from pymol import cmd
from sys import argv
import numpy as np
import colorsys
import sys
from pymol.cgo import *
sys.path.append('../analysis/')
from analysis.utility import *

# this script is run automatically by MCsim and will need to be changed for someone else's use

numFrames = int(argv[1])
channel = argv[2]
temperature = int(argv[3])
pathPDB = argv[4]
pathData = argv[5]
hull = argv[6]
rad = float(argv[7])
if (channel == 'PT'):
nodes = np.loadtxt(pathData+'nodeNumber')
nodes = np.vstack((np.linspace(0, np.shape(nodes)[1]-1, np.shape(nodes)[1]), nodes))
channel = np.asarray(nodes[:,temperature], 'int')
else:
channel = [channel]*numFrames
input_folder = pathData
side = 16
incr = 2*np.pi/(side/2.0)

for idx in range(0,numFrames): cmd.load(pathPDB+"coarse%03d.pdb"%idx,"snap")
cmd.mset("1 -%d" % numFrames)
cmd.color('gray80', 'snap')

file_inds = range(0,numFrames)
for ind in file_inds:
#load file r and u data
r = np.loadtxt('/%sr%sv%s' %(input_folder,ind,channel[ind]))
u = np.loadtxt('/%su%sv%s' %(input_folder,ind,channel[ind]))
# load discretization data
disc = np.loadtxt('/%sd%sv%s' %(input_folder,ind,channel[ind]))
wrap = disc[0]; bps = disc[1]
if (hull == 'True'):
for i in range(len(r)):
if bps[i] != 0:
poly = np.zeros(side*3).reshape([side,3])
uin = np.asarray(u[i,0:3]); vin = np.asarray(u[i,3:6]); cross = np.cross(uin, vin)
mat = np.matrix([vin, cross, uin]).reshape([3,3]).T
if (wrap[i]>1):
space = 0
height = 5.5
radius = 5.2
center = np.asarray([4.8455, -2.4445, 0.6694])
for j in range(int(side/2.0)):
# rotate into material frame
vec = np.asarray([radius*np.cos(space), -height/2.0, radius*np.sin(space)])
poly[j,:] = r[i,:] + np.matmul(mat, center+vec)
space = space + incr
space = 0
for j in range(int(side/2.0),side):
# rotate into material frame
vec = np.asarray([radius*np.cos(space), height/2.0, radius*np.sin(space)])
poly[j,:] = r[i,:] + np.matmul(mat, center+vec)
space = space + incr
# make cylinders
x1,y1,z1 = np.mean(poly[:int(side/2),0]), np.mean(poly[:int(side/2),1]), np.mean(poly[:int(side/2),2])
(re, g, b) = colorsys.hsv_to_rgb(float(i)/(len(r)-1), 1.0, 1.0)
x2,y2,z2 = np.mean(poly[int(side/2):,0]), np.mean(poly[int(side/2):,1]), np.mean(poly[int(side/2):,2])
cmd.load_cgo( [ 25.0, 0.25, 9.0, x1, y1, z1, x2, y2, z2, radius, re, g, b, re, g, b ], "seg"+str(i+1)+'nuc')
# add extruding linker
space = 2*np.pi/side
tempU, tempV, tempR = rotate_bead(uin,vin,r[i,:],int(bps[i]),int(wrap[i]))
height = np.sqrt(np.dot(r[i+1,:]-tempR, r[i+1,:]-tempR))
radius = 1.0
center = np.zeros(3)
tempMat = np.matrix([tempV, np.cross(tempU, tempV), tempU]).reshape([3,3]).T
for j in range(int(side/2.0)):
# rotate into material frame
vec = np.asarray([radius*np.sin(space), radius*np.cos(space), -height/2.0])
poly[j,:] = (tempR+r[i+1,:])/2.0 + np.matmul(tempMat, center+vec)
space = space + incr
space = 2*np.pi/side
for j in range(int(side/2.0),side):
# rotate into material frame
vec = np.asarray([radius*np.sin(space), radius*np.cos(space), height/2.0])
poly[j,:] = (tempR+r[i+1,:])/2.0 + np.matmul(tempMat, center+vec)
space = space + incr
else:
space = 2*np.pi/side
height = np.sqrt(np.dot(r[i+1,:]-r[i,:], r[i+1,:]-r[i,:]))
radius = 1.0
center = np.zeros(3)
for j in range(int(side/2.0)):
# rotate into material frame
vec = np.asarray([radius*np.sin(space), radius*np.cos(space), -height/2.0])
poly[j,:] = (r[i,:]+r[i+1,:])/2.0 + np.matmul(mat, center+vec)
space = space + incr
space = 2*np.pi/side
for j in range(int(side/2.0),side):
# rotate into material frame
vec = np.asarray([radius*np.sin(space), radius*np.cos(space), height/2.0])
poly[j,:] = (r[i,:]+r[i+1,:])/2.0 + np.matmul(mat, center+vec)
space = space + incr
# make cylinders
x1,y1,z1 = np.mean(poly[:int(side/2),0]), np.mean(poly[:int(side/2),1]), np.mean(poly[:int(side/2),2])
(re, g, b) = colorsys.hsv_to_rgb(float(i)/(len(r)-1), 1.0, 1.0)
x2,y2,z2 = np.mean(poly[int(side/2):,0]), np.mean(poly[int(side/2):,1]), np.mean(poly[int(side/2):,2])
cmd.load_cgo( [ 25.0, 0.25, 9.0, x1, y1, z1, x2, y2, z2, radius, re, g, b, re, g, b ], "seg"+str(i+1))
#cmd.mplay()
cmd.orient('snap')

# add sphere volume if set
if (rad>0):
spherelist = [
COLOR, 1, 0, 0,
SPHERE, rad, rad, rad, rad,
]
cmd.load_cgo(spherelist, 'sphere', 1)
cmd.set('cgo_transparency', 0.6)
cmd.set('cgo_sphere_quality', 100)

2 changes: 1 addition & 1 deletion vizualization/pymol/movieFine.py → analysis/movieFine.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pymol import cmd
from sys import argv

# this script is run automatically by NPrun.py and will need to be changed for someone else's use
# this script is run automatically by MCsim and will need to be changed for someone else's use

numFrames = int(argv[1])
pathPDB = argv[2]
Expand Down
File renamed without changes.
137 changes: 98 additions & 39 deletions analysis/utility.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,112 @@
#/usr/bin/python
# npagane | python utilities file

import pathlib
from pathlib import Path
import numpy as np

# set parameters
lengthPerBP = 0.332
defaultOmega = 34.3*np.pi/180
defaultDirectory = str(pathlib.Path(__file__).parent.absolute()).split(__file__)[0]+'/../'
length_per_bp = 0.332 # length in nm of 1 bp
default_omega = 2*np.pi/10.5 # default twist of DNA
max_bp_wrapped = 147
default_dir = str(Path(__file__).parent / Path('..'))

# read in translation and rotation files
nucleosomeTran = np.loadtxt(defaultDirectory+'input/nucleosomeT')
nucleosomeRot = np.loadtxt(defaultDirectory+'input/nucleosomeR')
nucleosome_tran = np.loadtxt('%s/input/nucleosomeT' %(default_dir))
nucleosome_rot = np.loadtxt('%s/input/nucleosomeR' %(default_dir))

def rotate_bead(Uin, Vin, Rin, link_bp, wrap_bp):
"""
Rotate and translate computational bead to account for DNA wrapping nucleosome
and expected entry-exit DNA angle
If the bead is a DNA bead, this function does NOT do any rotations/translations.
This is essentially just translated from Fortran (src/mc/nucleosome.f90) to Python.
Parameters
----------
Uin : numpy.array
orientation vector U of bead, tangent to polymer contour
Vin : numpy.array
orientation vector V of bead, constructed to define twist and perpenducalr to U
Rin : numpy.array
euclidian position of bead
link_bp : float or int
discretization of computational bead in bp
wrap_bp : int
number of basepairs wrapping bead, i.e. 147 for nucleosomes and 1 for DNA beads
# define bead translation + rotation
def rotateBead(Uin,Vin,Rin,linkBP,wrapBP):
angle = 2*np.pi/10.5
Returns
----------
Uout, Vout, Rout : list
list of the resultant vectors from rotating and translating Uin, Vin, and Rin
"""
mat = np.matrix([Vin, np.cross(Uin, Vin), Uin]).T
Rout = Rin + np.matmul(mat, nucleosomeTran[147-wrapBP])
linkRot = np.matrix([[np.cos(angle*linkBP), -np.sin(angle*linkBP), 0.],
[np.sin(angle*linkBP), np.cos(angle*linkBP), 0.],
Rout = Rin + np.matmul(mat, nucleosome_tran[max_bp_wrapped - wrap_bp])
linkRot = np.matrix([[np.cos(default_omega*link_bp), -np.sin(default_omega*link_bp), 0.],
[np.sin(default_omega*link_bp), np.cos(default_omega*link_bp), 0.],
[0., 0., 1.]])
mat = np.matmul(np.matmul(mat, nucleosomeRot[(3*(147-wrapBP)):(3*(147-wrapBP)+3),:]), linkRot)
Uout = mat[:,2]/np.linalg.norm(mat[:,2])
Vout = mat[:,0]/np.linalg.norm(mat[:,0])
mat = np.matmul(np.matmul(mat, nucleosome_rot[(3*(max_bp_wrapped - wrap_bp)):(3*(max_bp_wrapped - wrap_bp) + 3),:]), linkRot)
Uout = mat[: ,2]/np.linalg.norm(mat[:, 2])
Vout = mat[:, 0]/np.linalg.norm(mat[:, 0])
return np.squeeze(np.asarray(Uout)), np.squeeze(np.asarray(Vout)), np.squeeze(np.asarray(Rout))

# parametrize the double helix and return the backbone, base, backbone positions
# inspired from https://www.slideshare.net/dvidby0/the-dna-double-helix
def DNAhelix(bp,omega=defaultOmega, v=lengthPerBP): # distances=nm, angles=radians
r = 1.0
alpha = 0
beta=2.4
if (type(bp)==int or type(bp)==float):
bp = np.asarray([bp])
phos1 = np.zeros(len(bp)*3).reshape([len(bp),3])
phos2 = np.zeros(len(bp)*3).reshape([len(bp),3])
base = np.zeros(len(bp)*3).reshape([len(bp),3])
phos1[:,0] = r*np.cos(omega*bp+alpha); phos2[:,0] = r*np.cos(omega*bp+beta); base[:,0] = 0
phos1[:,1] = r*np.sin(omega*bp+alpha); phos2[:,1] = r*np.sin(omega*bp+beta); base[:,1] = 0
z = v*bp; phos1[:,2] = z; phos2[:,2] = z; base[:,2] = z

def DNAhelix(bp, omega=default_omega, v=length_per_bp):
"""
Parametrize the double helix and return the phosphate (strand 1), base, and
phosphate (strand 2) positions. This function helps to interpolate the "actual"
positions of DNA basepairs between coarse-grained computational bead locations.
Inspired from https://www.slideshare.net/dvidby0/the-dna-double-helix
Parameters
----------
bp : int or float
number of basepairs to construct a double helix for
omega : float, optional
default : 2 :math:`\pi` / 10.5 bp
twist of DNA in radians per bp
v : float, optional
default : 0.332 nm
length of DNA per bp in nm
Returns
----------
phos1, base, phos2 : list
list of positions of the two phosphate backbone strands and the
nitrogenous base positions for the number of basepairs requested
"""
r = 1.0 # radius of DNA helix
alpha = 0 # angular offset for DNA strand 1
beta = 2.4 # angular offset for DNA strand 2
z = v*bp
phos1 = np.array([r*np.cos(omega*bp + alpha), r*np.sin(omega*bp + alpha), z])
phos2 = np.array([r*np.cos(omega*bp + beta), r*np.sin(omega*bp + beta), z])
base = np.array([0, 0, z])
return phos1, base, phos2

# get the change in the UV angle
def getUVangle(r1,r2,u1,u2):
t = np.linspace(0,1,2)
pt1 = r1+u1*t[0]
pt2 = r1+u1*t[1]
pt3 = r2+u2*t[0]
pt4 = r2+u2*t[1]
b1 = pt2 - pt1
b2 = pt4 - pt3
return np.arccos(np.round(np.dot(b1,b2)/(np.linalg.norm(b1)*np.linalg.norm(b2)),6))
def get_uv_angle(u1, u2):
"""
Get the UV angle, i.e. the "bond" angle between two consecutive beads.
For two DNA beads, the UV angle should fluctuate around 0 or multiples of :math:`\pi`
since DNA should be relatively straight. The UV bond angle for a nucleosome bead should
fluctuate around the entry/exit angle of a crystallized nucleosome.
Parameters
----------
u1 : numpy.array
orientation vector U of bead 1
u2 : numpy.array
orientation vector U of bead 2
Returns
----------
bond_angle : float
bond angle between beads 1 and 2 in radians
"""
bond_angle = np.dot(u1,u2)
bond_angle = bond_angle/(np.linalg.norm(u1)*np.linalg.norm(u2))
# round avoids numerical errors that will result in bond_angle > 1
bond_angle = np.round(bond_angle, 6)
# since bond_angle <=1, we can now take arccos
return np.arccos(bond_angle)
15 changes: 15 additions & 0 deletions input/example_defines/LAM_attraction_base.inc
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@
! DEFAULTS COMMENT: ! assuming same number of beads on each polymer
! TYPE: INTEGER
#define WLC_P__NBPL 0
! VARIABLE COMMENT: ! Number of beads per linker segement, this determines discretization (only relevant for risca chromatin sims)
! DEFAULTS COMMENT: ! Not in use.
! TYPE: INTEGER
#define WLC_P__LINKING_NUMBER 0
! VARIABLE COMMENT: ! Linking number of ring for global twist energy calculation.
! DEFAULTS COMMENT: ! Not a ring so doesn't matter.
Expand Down Expand Up @@ -358,6 +363,16 @@
! DEFAULTS COMMENT: ! Not in use.
! TYPE: REAL(DP)

#define WLC_P__LINKER_DISCRETIZATION 0
! VARIABLE COMMENT: ! Discretization of computational beads (only relevant for risca chromatin sims)
! DEFAULTS COMMENT: ! Not in use.
! TYPE: REAL(DP)

#define WLC_P__NUM_NUCLEOSOMES 0
! VARIABLE COMMENT: ! how many nucleosomes per chain (only relevant for risca chromatin sims)
! DEFAULTS COMMENT: ! Note in use.
! TYPE: INTEGER

#define WLC_P__LINKER_TYPE 'none'
! VARIABLE COMMENT: ! linker length geometry type for initialization (i.e. phased array or sampled from a file)
! DEFAULTS COMMENT: ! Not in use
Expand Down
15 changes: 15 additions & 0 deletions input/example_defines/MBS_PNAS_2018_baseCase.inc
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,11 @@
! DEFAULTS COMMENT: ! assuming same number of beads on each polymer
! TYPE: INTEGER

#define WLC_P__NBPL 0
! VARIABLE COMMENT: ! Number of beads per linker segement, this determines discretization (only relevant for risca chromatin sims)
! DEFAULTS COMMENT: ! Not in use.
! TYPE: INTEGER

#define WLC_P__LINKING_NUMBER 0
! VARIABLE COMMENT: ! Linking number of ring for global twist energy calculation.
! DEFAULTS COMMENT: ! Not a ring so doesn't matter.
Expand Down Expand Up @@ -350,6 +355,16 @@
! DEFAULTS COMMENT: ! Not in use.
! TYPE: REAL(DP)

#define WLC_P__LINKER_DISCRETIZATION 0
! VARIABLE COMMENT: ! Discretization of computational beads (only relevant for risca chromatin sims)
! DEFAULTS COMMENT: ! Not in use.
! TYPE: REAL(DP)

#define WLC_P__NUM_NUCLEOSOMES 0
! VARIABLE COMMENT: ! how many nucleosomes per chain (only relevant for risca chromatin sims)
! DEFAULTS COMMENT: ! Note in use.
! TYPE: INTEGER

#define WLC_P__LINKER_TYPE 'phased'
! VARIABLE COMMENT: ! linker length geometry type for initialization (i.e. phased array or sampled from a file)
! DEFAULTS COMMENT: ! default to phased, i.e. set up a chain of constant linker length
Expand Down

0 comments on commit 70221fc

Please sign in to comment.