In [None]:
# Author: Zane Thornburg

### User Inputs ###

simulation_time = 10.0 # seconds
simulation_folder_name = 'new_folder'
DNA_configuration_number = '01' # 01 - 70, must be two digits in a string

## WARNING: Because SBTab tables are used for metabolic reaction parameters, it may conflict with itself.
## If an SBTab error occurs saying table already in notebook, simply Restart and Run All again.

In [None]:
# Import Dependencies
from jLM.RegionBuilder import RegionBuilder
from jLM.RDME import Sim as RDMESim
from jLM.RDME import File as RDMEFile
import jLM

from jLM.Solvers import makeSolver

from pyLM import CME

from pyLM.units import *

import lm

from lm import MpdRdmeSolver
from lm import IntMpdRdmeSolver

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy  as np

import os

import scipy.ndimage as spnd
import ipywidgets as ipw
import h5py
import itertools
import random
import copy

# import ipyvolume
# from sidecar import Sidecar
# import numpy as np
# from ipywebrtc import WidgetStream, VideoRecorder

import math
import scipy as sp
import scipy.spatial

# import seaborn as sns

import csv
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
import importlib
from collections import defaultdict, OrderedDict

import time

try:
        from tqdm import tqdm
#         print('Imported tqdm')
except:
        def tqdm(x,ascii=False):
                return x


In [None]:
%matplotlib notebook

In [None]:
import warnings
warnings.simplefilter("ignore")

In [None]:
delt = 1.0 #s
odestep = 0.1 # s
cythonBool = False
totalTime = simulation_time #s

In [None]:
def initSim():

    simFolder = '../simulations/' + simulation_folder_name + '/'
    
    try:
        os.makedirs(simFolder)
        print('Created sim directory')
    except:
        print('sim directory already exists')

    filename = simFolder + 'MinCell_jLM_RDME_CME_ODE_polysomes.lm'

    N_edges = 64 # Number of subvolumes making up and edge of the simulation space N x N x N

    N_2 = N_edges/2

    sim = RDMESim("JCVI-syn3A",
                  filename,
                  [N_edges,N_edges,N_edges],
                  8e-9,
                  "extracellular")

    cyto_radius = 2.00e-7/sim.latticeSpacing #m converted to lattice sites (8 nm lattice spacing)
    dna_monomers = 46188

    cyto_vol = (4/3)*np.pi*0.200**3

    cyto_200 = (4/3)*np.pi*0.2**3

    ptn_ratio = (2.3e6*cyto_vol)/(2.3e6*cyto_200)
#     print(ptn_ratio)

    riboFile = '../model_data/s1c15/s1c15_coords_nm_adaptive_fitting_s1c15_trans_id_8nm.txt'
    
    dnaFile = '../model_data/s1c15/s1c15_base_CG_reps00001_00090/s1c15_base/CG/s1c15_base_rep000' + DNA_configuration_number + '_CG_coords.dat'
    dnaPartFile = '../model_data/s1c15/s1c15_base_CG_reps00001_00090/s1c15_base/CG/s1c15_base_rep000' + DNA_configuration_number + '_FG_nodes.dat'
    
    sim_center = [N_2,N_2,N_2]

    sim.timestep = 30e-6
    sim.simulationTime=totalTime
    sim.latticeWriteInterval=1.0
    sim.speciesWriteInterval=1.0
    replicates = 1
    
#     sim.hookInterval(delt)
    
    pmap = {}
    
    PartIdxMap = {}
    
#     print('Configuration ' + str(rep+1) + '/' + str(len(configs)) + ' initialized')
    print('Simulation Initialized')
    
    return sim, N_edges, N_2, sim_center, ptn_ratio, dna_monomers, cyto_radius, riboFile, dnaFile, dnaPartFile, filename, simFolder, PartIdxMap, pmap

In [None]:
### Load all necessary files
# The reconstruction matches reactions with gene-protein-reactions (GPR) that use MMSYN1* IDs.
reconstPD = pd.read_excel("../model_data/reconstruction.xlsx", sheet_name='Reactions')

# The annotation matches MMSYN1* IDs with JCVISYN3* IDs (or "locus tags").
annotatPD = pd.read_excel("../model_data/FBA/Syn3A_annotation_compilation.xlsx",
                         sheet_name="Syn3A_annotation_compilation_condensed")

# The genome data matches "locus tags" with AOE* protein IDs.
# It provides both the gene sequence, needed for transcription reactions in the ODE model,
# and the protein sequence, needed for translation reactions in the model.
# This is the NCBI Gene Bank-formated file (https://www.ncbi.nlm.nih.gov/nuccore/CP014992.1).

genomeFile2 = '../model_data/syn2.gb'
genome2 = next(SeqIO.parse(genomeFile2, "gb"))

# This is the NCBI Gene Bank-formated file (https://www.ncbi.nlm.nih.gov/nuccore/CP016816.2).
genomeFile3A = '../model_data/syn3A.gb'
genome3A = next(SeqIO.parse(genomeFile3A, "gb"))

# The proteomics matches AOE IDs with quantitative proteomics data.
proteomPD = pd.read_excel("../model_data/proteomics.xlsx", sheet_name="Proteomics", skiprows=[0] )

genome_syn3A = list(SeqIO.parse(genomeFile3A, "genbank"))
dna3A = genome_syn3A[0]

In [None]:
def getSequences(jcvi3AID):
    # returns genomic and protein sequences
    try:
        rnasequence = genomeLocDict[jcvi3AID].extract(genome3A.seq).transcribe()
        
        # Using translation table 4 from NCBI: "Mycoplasma Code"
        # https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG4
        aasequence  = genomeLocDict[jcvi3AID].extract(genome3A.seq).transcribe().translate(table=4)
        
    except:
        aasequence  = 0
        rnasequence = 0
    
    return rnasequence, aasequence

In [None]:
def getRNAsequences(jcvi3AID):
    # returns genomic and protein sequences
    try:
        rnasequence = genomeLocDict[jcvi3AID].extract(genome3A.seq).transcribe()
        
    except:
        rnasequence = 0
    
    return rnasequence

In [None]:
AOEtoJ2 = dict()
J2toAOE = dict()
genomeLocDict = dict()
genomePtnLocDict = dict()
genomeRnaLocDict = dict()
Locus3A = []

for f in genome2.features:
    if f.type == "CDS":
        JCVSYN2_tag = f.qualifiers['locus_tag'][0]
        #print(JCVSYN2_tag)
        # Not all entries have an AOE protein_id
        if('protein_id' in f.qualifiers.keys()):
            AOE_locus = f.qualifiers['protein_id'][0]
            AOEtoJ2[AOE_locus] = JCVSYN2_tag
            J2toAOE[JCVSYN2_tag] = AOE_locus
#             genomeLocDict[JCVSYN2_tag] = f.location
        else:
            print("Locus ", JCVSYN2_tag, " has no AOE id!")
    if f.type == "rRNA":
        JCVSYN2_tag = f.qualifiers['locus_tag'][0]
#         genomeLocDict[JCVSYN2_tag] = f.location
    if f.type == "tRNA":
        JCVSYN2_tag = f.qualifiers['locus_tag'][0]
#         genomeLocDict[JCVSYN2_tag] = f.location
        
for f in genome3A.features:
    if f.type == "CDS":
        JCVSYN3A_tag = f.qualifiers['locus_tag'][0]
        Locus3A.append(JCVSYN3A_tag)
        #print(JCVSYN2_tag)
        # Not all entries have an AOE protein_id
        if('protein_id' in f.qualifiers.keys()):
#             AOE_locus = f.qualifiers['protein_id'][0]
#             AOEtoJ2[AOE_locus] = JCVSYN2_tag
#             J2toAOE[JCVSYN2_tag] = AOE_locus
            genomePtnLocDict[JCVSYN3A_tag] = f.location
            genomeLocDict[JCVSYN3A_tag] = f.location
        else:
            print("Locus ", JCVSYN3A_tag, " is pseudo.")
    if f.type == "rRNA":
        JCVSYN3A_tag = f.qualifiers['locus_tag'][0]
        Locus3A.append(JCVSYN3A_tag)
        genomeRnaLocDict[JCVSYN3A_tag] = f.location
        genomeLocDict[JCVSYN3A_tag] = f.location
    if f.type == "tRNA":
        JCVSYN3A_tag = f.qualifiers['locus_tag'][0]
        Locus3A.append(JCVSYN3A_tag)
        genomeRnaLocDict[JCVSYN3A_tag] = f.location
        genomeLocDict[JCVSYN3A_tag] = f.location

In [None]:
gene_list = []
for i in range(len(dna3A.features)):
    if ('product' in dna3A.features[i].qualifiers.keys()):
        #print(i) # This first statement works
        #print(dna.features[i].qualifiers['product'])
        if dna3A.features[i].qualifiers['product'][0]:# Figure out how to sort out for ribosomal operons?
            #print(dna.features[i].qualifiers['product'])
            gene_list.append(i)
# gene_list

gene_starts = []

for gene in gene_list:
    
    locusTag = dna3A.features[gene].qualifiers['locus_tag'][0]
    gene_start = dna3A.features[gene].location.start.real
    
    direction = dna3A.features[gene].strand
    
    gene_starts.append([locusTag,gene_start,direction])
    
# gene_starts

In [None]:
PtnMetDF = pd.read_csv("../model_data/protein_metabolites_frac.csv")
# PtnMetDF

In [None]:
riboPtnMetDF = pd.read_csv("../model_data/ribo_protein_metabolites.csv")
# riboPtnMetDF

In [None]:
memPtnMetDF = pd.read_csv("../model_data/membrane_protein_metabolites.csv")
# memPtnMetDF

In [None]:
rrnaMetDF_1 = pd.read_csv("../model_data/rrna_metabolites_1.csv")
# rrnaMetDF_1

In [None]:
rrnaMetDF_2 = pd.read_csv("../model_data/rrna_metabolites_2.csv")
# rrnaMetDF_2

In [None]:
trnaMetDF = pd.read_csv("../model_data/trna_metabolites_synthase.csv")
# trnaMetDF

In [None]:
named_PTN_list = []

for index, row in riboPtnMetDF.iterrows():
    named_PTN_list.append(row["gene"]) 

    
for index, row in PtnMetDF.iterrows():
#     print(row["gene"])
    named_PTN_list.append(row["gene"])

In [None]:
trnaCmeMetDF = pd.read_csv("../model_data/trna_metabolites_synthase.csv")

In [None]:
from diffusion import *
# from MC_CME import *
from MC_RDME import * 
from regions_and_complexes import *
from GIP_rates import *


In [None]:
rep = 1
partIdx = 1

sim, N_edges, N_2, sim_center, ptn_ratio, dna_monomers, cyto_radius, riboFile, dnaFile, dnaPartFile, filename, simFolder, PartIdxMap, pmap = initSim()
    
sim, genePoints, ribo_points, ribo_center_points, ext, mem, cyt, ribo, dna, she, cyto_shell, partIdx = buildRegions(sim, N_edges, N_2, sim_center, ptn_ratio, dna_monomers, cyto_radius, riboFile, dnaFile, filename, pmap, PartIdxMap, partIdx)

# sim.finalize()

In [None]:
# sim.displayGeometry()

In [None]:
# break

In [None]:
sim, geneEnds, geneStarts, singleStatePtnDict, multiStatePtnDict, degDict, tRNAstateDict, RDME_species_list, partIdx, rtRNA_ID_dict, ordered_poly_ribo = constructRDME(sim, pmap, genePoints, ribo_points, ribo_center_points, ext, mem, cyt, ribo, dna, she, cyto_shell, N_edges, N_2, sim_center, ptn_ratio, dna_monomers, cyto_radius, dnaPartFile, gene_starts, PtnMetDF, riboPtnMetDF, memPtnMetDF, trnaMetDF, genomePtnLocDict, PartIdxMap, partIdx)

In [None]:
# print(len(pmap))
print(pmap)

In [None]:
sim.showSpecies('R_1')


In [None]:
# sim.sp

In [None]:
print(partIdx)
print(PartIdxMap)

In [None]:
specList = ''

Idx_list = []

for metID, idx in PartIdxMap.items():
    
    Idx_list.append(idx)
    
    specList = specList + ',' + metID

In [None]:
mistakes = 0

for i in range(len(Idx_list)):
    try:
        if Idx_list[i+1] != Idx_list[i] + 1:
            print(Idx_list[i+1],Idx_list[i],i)
            mistakes = mistakes + 1
    except:
        print(i)
print(mistakes)        
print(len(specList))

In [None]:
sim

In [None]:
# break

In [None]:
import setICs
setICs.__main__(pmap)

In [None]:
import hook
rdmeCmeOdeHookSolver = hook.MyOwnSolver

In [None]:
Solver = makeSolver(IntMpdRdmeSolver, rdmeCmeOdeHookSolver)
solver = Solver(sim, simFolder, delt, odestep, cythonBool, pmap, totalTime, geneEnds, geneStarts, singleStatePtnDict, multiStatePtnDict, degDict, tRNAstateDict, RDME_species_list, PartIdxMap, rtRNA_ID_dict, ordered_poly_ribo)

In [None]:
sim.finalize()

In [None]:
# break

In [None]:
sim.run(solver=solver, cudaDevices=[0])