### 1D Simulation + Save Trajectory

In [3]:
import os
import numpy as np
if not os.path.exists("trajectory"):
    os.mkdir("trajectory")

In [7]:
########## 
# Define functions needed for calculating trajectories
##########

import numpy as np
import pandas as pd
import h5py 
import matplotlib.pyplot as plt

class leg(object):
    def __init__(self, pos, attrs={"stalled":False, "CTCF":False}):
        """
        A leg has two important attribues: pos (positions) and attrs (a custom list of attributes)
        Represents one side of the cohesin
        """
        self.pos = pos
        self.attrs = dict(attrs)

class cohesin(object):
    """
    A cohesin class provides fast access to attributes and positions 
    
    
    cohesin.left is a left leg of cohesin, cohesin.right is a right leg
    cohesin[-1] is also a left leg and cohesin[1] is a right leg         
    
    Also, cohesin.any("myattr") is True if myattr==True in at least one leg
    cohesin.all("myattr") is if myattr=True in both legs
    """
    def __init__(self, leg1, leg2):
        self.left = leg1 # Left position on the polymer that the cohesin is contacting
        self.right = leg2 # Right position on the polymer that the cohesin is contacting
   
    def any(self, attr): # Is attr true for either leg?
        return self.left.attrs[attr] or self.right.attrs[attr]
    
    def all(self, attr): # Is attr true for both legs?
        return self.left.attrs[attr] and self.right.attrs[attr]    
    
    def __getitem__(self, item): # Method so that we can index the object via []
        if item == -1:
            return self.left
        elif item == 1:
            return self.right 
        else:
            raise ValueError()
        

def unloadProb(cohesin, args):
    """
    Defines unload probability based on a state of cohesin 
    """
    if cohesin.any("stalled"):
        # if one side is stalled, we have different unloading probability 
        # Note that here we define stalled cohesins as those stalled not at CTCFs 
        return 1 / args["LIFETIME_STALLED"]
    # otherwise we are just simply unloading 
    return 1 / args["LIFETIME"]    
    


def loadOne(cohesins, occupied, args): 
    """
    A function to load one cohesin 
    """
    while True:
        a = np.random.randint(args["N"]) # Candidate spot for cohesin loading
        if (occupied[a] == 0) and (occupied[a+1] == 0): # Both the target spot and the next spot must be unoccupied 
            occupied[a] = 1 # Spot occupied by cohesin
            occupied[a+1] = 1 # Next spot also occupied by cohesin
            print('Loading cohesin at left leg = {}, right leg = {}'.format(a, a+1))
            cohesins.append(cohesin(leg(a), leg(a+1))) # Append cohesin OBJECT to list
            break


def capture(cohesin, occupied, args):
    """
    We are describing CTCF capture here. 
    This function is specific to this particular project, and 
    users are encouraged to write functions like this 
    
    Note the for-loop over left/right sites below, and using cohesin[side] 
    to get left/right leg. 
    
    Also note how I made ctcfCapture a dict with -1 coding for left side, and 1 for right side 
    and ctcfCapture are dicts as well: keys are locations, and values are probabilities of capture
    """    
    for side in [1, -1]:
        # get probability of capture or otherwise it is 0 
        if np.random.random() < args["ctcfCapture"][side].get(cohesin[side].pos, 0):  
            if side==-1:
                print('Cohesin left leg at pos {} captured by CTCF'.format(cohesin.left.pos))
            if side==1:
                print('Cohesin right leg at pos {} captured by CTCF'.format(cohesin.right.pos))
            cohesin[side].attrs["CTCF"] = True  # captured a cohesin at CTCF     
    return cohesin 


def release(cohesin, occupied, args):
    
    """
    AN opposite to capture - releasing cohesins from CTCF 
    """
    
    if not cohesin.any("CTCF"):
        return cohesin  # no CTCF: no release necessary 
        
    # attempting to release either side 
    for side in [-1, 1]: 
        # Check if probability (rand) is less than the CTCF release prob. for the current side (-1 or 1)
        if (np.random.random() < args["ctcfRelease"][side].get(cohesin[side].pos, 0)) and (cohesin[side].attrs["CTCF"]):
            if side==-1:
                print('Cohesin left leg at pos {} released by CTCF'.format(cohesin.left.pos))
            if side==1:
                print('Cohesin right leg at pos {} released by CTCF'.format(cohesin.left.pos))
            cohesin[side].attrs["CTCF"] = False # CTCF is released, thus set the current cohesin leg's CTCF attribute to False
    return cohesin 


def translocate(cohesins, occupied, args):
    """
    This function describes everything that happens with cohesins - 
    loading/unloading them and stalling against each other 
    
    It relies on the functions defined above: unload probability, capture/release. 
    """
    # first we try to unload cohesins and free the matching occupied sites 
    for i in range(len(cohesins)):
        prob = unloadProb(cohesins[i], args) # Get the unloading probability for this cohesin
        if np.random.random() < prob: # Does cohesin get unloaded?
            print('Cohesin unloaded -- left leg: {}, right leg: {}'.format(cohesins[i].left.pos, cohesins[i].right.pos))
            occupied[cohesins[i].left.pos] = 0 # Unload left leg of cohesin
            occupied[cohesins[i].right.pos] = 0 # Unload right leg of cohesin
            del cohesins[i] # Remove the old cohesin object
            loadOne(cohesins, occupied, args) # load new cohesin at new spot
    
    # then we try to capture and release them by CTCF sites 
    for i in range(len(cohesins)): # For all cohesins, try to capture / release from CTCF
        cohesins[i] = capture(cohesins[i], occupied, args) # Capture
        cohesins[i] = release(cohesins[i], occupied, args) # Release
    
    # finally we translocate, and mark stalled cohesins because 
    # the unloadProb needs this 
    for i in range(len(cohesins)):
        cohesin = cohesins[i] 
        for leg in [-1,1]: 
            if not cohesin[leg].attrs["CTCF"]: # If this leg of this cohesin is not CTCF-bound
                # cohesins that are not at CTCFs and cannot move are labeled as stalled 
                if occupied[cohesin[leg].pos  + leg] != 0: # If the cohesin cannot move 
                    print('Cohesin leg at pos {} stalled'.format(cohesin[leg].pos))
                    cohesin[leg].attrs["stalled"] = True
                else:
                    cohesin[leg].attrs["stalled"] = False 
                    print('Cohesin leg at pos {} translocating'.format(cohesin[leg].pos))
                    occupied[cohesin[leg].pos] = 0 # Mark this leg as unoccupied
                    occupied[cohesin[leg].pos + leg] = 1 # Mark other leg as occupied - 'moves' the cohesin 
                    cohesin[leg].pos += leg # Update the position of the cohesin leg        
        cohesins[i] = cohesin # Reassign cohesin object to cohesin list
        
def color(cohesins, args):
    "A helper function that converts a list of cohesins to an array colored by cohesin state"    
    def state(attrs):
        if attrs["stalled"]:
            return 2
        if attrs["CTCF"]:
            return 3
        return 1
    ar = np.zeros(args["N"])
    for i in cohesins:
        ar[i.left.pos] = state(i.left.attrs)
        ar[i.right.pos] = state(i.right.attrs)  
    return ar 

In [41]:
##### Set up parameters ('args')
# Size of polymer and number of polymers
N1 = 1000 # Size of 1 polymer
M = 10 # number of polymers
N = N1 * M # Total system size (in monomers)

LIFETIME = 200 # 1/(unload prob.)
SEPARATION = 200 # Space between cohesins, just used in calculating number of cohesins to load
LEFNum = N // SEPARATION # Calculate number of cohesins to load for avg. separation and total system size
print('{} cohesins will be loaded initially'.format(LEFNum))
trajectoryLength = 1000 # 'Length' of the trajectory to be recorded 

CTCFsites = [250,500,750] # List of ints that denote CTCF binding sites. Indices are relative to 1 polymer length. CTCF sites are constant over all polymers
# these dicts will holdinfo for pairs of CTCFs (left/right). {position:probability}
ctcfLeftCapture = {}
ctcfRightCapture = {} 
ctcfLeftRelease = {}     
ctcfRightRelease = {}  

# At each site, 2 CTCFs will be loaded, a left-facing and a right-facing one
for i in range(M):
    for site in CTCFsites:
        site_pos = i * N1 + site
        ctcfLeftCapture[site_pos] = 0.9
        ctcfLeftRelease[site_pos] = 0.003
        ctcfRightCapture[site_pos] = 0.9
        ctcfRightRelease[site_pos] = 0.003

# Now populate the dict with all the params that our functions expect
args = {}
args["ctcfRelease"] = {-1:ctcfLeftRelease, 1:ctcfRightRelease}  # this is how we feed the dictionaries of capture/release probabilities to our code 
args["ctcfCapture"] = {-1:ctcfLeftCapture, 1:ctcfRightCapture}        
args["N"] = N 
args["LIFETIME"] = LIFETIME # 1/(unload_prob) when not stalled
args["LIFETIME_STALLED"] = LIFETIME/10 # 1/(unload_prob) when stalled

50 cohesins will be loaded initially


In [42]:
##### Doing simulation and writing positions to disk simultaneously

occupied = np.zeros(N) # Array telling us 'is position n occupied?' (1=yes, 0=no)
occupied[0] = 1
occupied[-1] = 1 # Make the ends of the polymer occupied so we dont get index OOB error
cohesins = [] # List to store our cohesin objects

for i in range(LEFNum): # Load cohesins
    loadOne(cohesins,occupied, args)

with h5py.File("trajectory/LEFPositions.h5", mode='w') as myfile: # Write output hdf
    dset = myfile.create_dataset("positions", # Create dataset in HDF
                                 shape=(trajectoryLength, LEFNum, 2), 
                                 dtype=np.int32, 
                                 compression="gzip")
    steps = 50  # No. of chunks 
    bins = np.linspace(0, trajectoryLength, steps, dtype=int) # chunks boundaries 
    for st,end in zip(bins[:-1], bins[1:]): # pair successive bin indices - out polymers are technically just sections of one really long polymer
        cur = []
        for i in range(st, end):
            translocate(cohesins, occupied, args)  # actual step of LEF dynamics 
            positions = [(cohesin.left.pos, cohesin.right.pos) for cohesin in cohesins] # Get indices of all legs from all cohesins
            cur.append(positions)  # appending current positions to an array 
        cur = np.array(cur)  # when we finished a block of positions, save it to HDF5 
        dset[st:end] = cur # Write positions to HDF dataset
    myfile.attrs["N"] = N
    myfile.attrs["LEFNum"] = LEFNum

Loading cohesin at left leg = 8070, right leg = 8071
Loading cohesin at left leg = 3249, right leg = 3250
Loading cohesin at left leg = 4707, right leg = 4708
Loading cohesin at left leg = 3035, right leg = 3036
Loading cohesin at left leg = 7765, right leg = 7766
Loading cohesin at left leg = 9410, right leg = 9411
Loading cohesin at left leg = 6571, right leg = 6572
Loading cohesin at left leg = 4813, right leg = 4814
Loading cohesin at left leg = 9266, right leg = 9267
Loading cohesin at left leg = 3557, right leg = 3558
Loading cohesin at left leg = 2057, right leg = 2058
Loading cohesin at left leg = 6711, right leg = 6712
Loading cohesin at left leg = 3749, right leg = 3750
Loading cohesin at left leg = 7003, right leg = 7004
Loading cohesin at left leg = 8889, right leg = 8890
Loading cohesin at left leg = 7245, right leg = 7246
Loading cohesin at left leg = 6923, right leg = 6924
Loading cohesin at left leg = 3306, right leg = 3307
Loading cohesin at left leg = 5055, right leg 

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Cohesin leg at pos 3165 translocating
Cohesin leg at pos 5608 translocating
Cohesin leg at pos 5655 translocating
Cohesin leg at pos 7127 translocating
Cohesin leg at pos 7168 translocating
Cohesin leg at pos 9403 translocating
Cohesin leg at pos 9442 translocating
Cohesin leg at pos 7652 translocating
Cohesin leg at pos 7689 translocating
Cohesin leg at pos 421 stalled
Cohesin leg at pos 445 translocating
Cohesin leg at pos 1416 translocating
Cohesin leg at pos 1447 translocating
Cohesin leg at pos 570 stalled
Cohesin leg at pos 599 translocating
Cohesin leg at pos 7778 translocating
Cohesin leg at pos 7801 translocating
Cohesin leg at pos 2523 translocating
Cohesin leg at pos 2542 translocating
Cohesin leg at pos 4649 translocating
Cohesin leg at pos 4662 translocating
Cohesin leg at pos 1069 translocating
Cohesin leg at pos 1080 translocating
Cohesin leg at pos 562 translocating
Cohesin leg at pos 569 stalled
Cohesin leg at pos 8922 translocating
Cohesin leg at pos 8927 translocatin

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Cohesin right leg at pos 5750 captured by CTCF
Cohesin right leg at pos 4500 captured by CTCF
Cohesin right leg at pos 7750 captured by CTCF
Cohesin left leg at pos 3250 captured by CTCF
Cohesin leg at pos 6493 translocating
Cohesin leg at pos 7123 translocating
Cohesin leg at pos 7804 translocating
Cohesin leg at pos 1942 translocating
Cohesin leg at pos 803 stalled
Cohesin leg at pos 3527 translocating
Cohesin leg at pos 3750 translocating
Cohesin leg at pos 7499 stalled
Cohesin leg at pos 4340 stalled
Cohesin leg at pos 4609 translocating
Cohesin leg at pos 3047 stalled
Cohesin leg at pos 4974 translocating
Cohesin leg at pos 8326 translocating
Cohesin leg at pos 9564 stalled
Cohesin leg at pos 96 translocating
Cohesin leg at pos 5830 translocating
Cohesin leg at pos 6009 translocating
Cohesin leg at pos 1533 translocating
Cohesin leg at pos 1744 translocating
Cohesin leg at pos 9563 stalled
Cohesin leg at pos 5646 translocating
Cohesin leg at pos 4391 translocating
Cohesin leg at p

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [73]:
## Look at the trajectory HDF to see what the structure is like
with h5py.File("trajectory/LEFPositions.h5", mode='r') as f:
    pos = f['positions']
    print(pos.shape)

(10000, 50, 2)
