## Building linear network 
Code to build a linear network, visualise it and write down the reactions occuring and set up the initial conditions.
Adapted directly from the code written by Nico Bodkin. The model and reaction is based on Schuh, et al 2020

### Importing the required packages for this step

In [8]:
import pandas as pd
import numpy as np
import networkx as nx
import os
import matplotlib.pyplot as plt
import sympy

In [6]:
%pip install sympy

Collecting sympy
  Downloading sympy-1.13.2-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading sympy-1.13.2-py3-none-any.whl (6.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Installing collected packages: mpmath, sympy
Successfully installed mpmath-1.3.0 sympy-1.13.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Creating a linear graph 
each node is a gene/ chemical species and each edge is a reaction with the weight being the strength of reaction and the sign being activation or repression.

In [17]:
def createLinearGraph(outputPath, numGenes):
    DG = nx.DiGraph()
    middleGene = numGenes//2
    geneList = []
    for i in np.arange(numGenes):
        if i >= middleGene:
            geneList.append("g"+str(i - middleGene))
        else:
            geneList.append("g-"+str(middleGene - i))
    DG.add_nodes_from(geneList)

    edgeList = []

    for i, gene in enumerate(geneList[:-1]):
        initGene = gene

        #This is the species, 
        edgeList.append((initGene, geneList[i+1], 1))
    
    DG.add_weighted_edges_from(edgeList)
    if not os.path.exists(outputPath):
        os.mkdir(outputPath)
    nx.write_gml(DG, os.path.join(outputPath, "graph.gml"))
    return DG, geneList

### Visualising the gene network created

In [2]:
def drawGraph(DG, path=None):
    if(path):
        plt.figure(figsize=(10, 10)) 
        pos = nx.shell_layout(DG, rotate = 270)
        nx.draw(DG, with_labels=True, font_weight='medium', node_color='lightblue', pos=pos, node_size=500, font_size=12, width=1.5, alpha=0.8)
        plt.savefig(path, format='png', transparent=True)
        plt.close()
    else:
        pos = nx.shell_layout(DG, rotate = 270)
        nx.draw(DG, with_labels=True, font_weight='medium', node_color='lightblue', pos=pos, node_size=500, font_size=12, width=1.5, alpha=0.8)
    return

### Writing the reactions
based on the described propensities which are defined above.

In [9]:
#Defining propensity when a gene is activated by mRNA from a different gene
activationGeneralPropensity = "({rAdd}*(activator_mRNA**{n})/({k}**{n} + activator_mRNA**{n}))*gene_I"
#Defining propensity when a gene is activated from the inactive state of same gene with no activator molecules present
activationOnlyPropensity = "{rOn}*gene_I"
#Defining propensity when a gene is inactivated
inactivationGeneralPropensity = "{rOff}*gene_A"
#Defining propensity from mRNA production when is the corresponding gene is active
mrnaProductionActivated = "{d}*{rProd}*gene_A"
#Defining propensity from mRNA production when is the corresponding gene is inactive
mrnaProductionInactivated = "{rProd}*gene_I"
#Defining propensity when an mRNA molecule is degraded
degradationGeneralPropensity = "{rDeg}*gene_mRNA"

def prepareReactions(geneList, DG):
    reactionList = []
    
    for gene in geneList:
        propensities = []

        #The propensity of the gene activating without any activators
        activationDirectPropensity = activationOnlyPropensity.replace("gene", gene)
        propensities.append(activationDirectPropensity)
        entry = {"species1": gene + "_A", "change1": 1, "propensity": activationDirectPropensity, "species2": gene + "_I", "change2":-1}
        reactionList.append(entry)

        #Parsing through all the activating genes for this gene based on the directed edges in the graph and adding that to the reaction list 
        for edge in DG.in_edges(gene, data = True):
            activationPropensity = activationGeneralPropensity.replace('activator', edge[0])
            activationPropensity = activationPropensity.replace('gene', gene)       

            #Creating reactions: Species one is the one that is created in this step and the Change1 is +1 to its population. Species2 is the entity that is consumed in the reaction and hnece change2 is -1. Basically, there are two entities involved in this reaction.
            entry = {"species1": gene + "_A", "change1": 1, "propensity": activationPropensity, "species2": gene + "_I", "change2":-1}
            reactionList.append(entry)
        
        #Propensity for Inactivation of gene
        inactivationPropensity = inactivationGeneralPropensity.replace("gene", gene)
        entry = {"species1": gene + "_I", "change1": 1, "propensity": inactivationPropensity, "species2": gene + "_A", "change2":-1}
        reactionList.append(entry)

        #Propensity for production of mRNA from a gene in activated or inactivated state
        productionPropensityActivated = mrnaProductionActivated.replace("gene", gene)
        entry = {"species1": gene + "_mRNA", "change1": 1, "propensity": productionPropensityActivated, "species2": "-", "change2":"-"}
        reactionList.append(entry)

        productionPropensityInactivated = mrnaProductionInactivated.replace("gene", gene)
        entry = {"species1": gene + "_mRNA", "change1": 1, "propensity": productionPropensityInactivated, "species2": "-", "change2":"-"}
        reactionList.append(entry)

        #Propensity for degradation of mRNA
        degradationPropensity = degradationGeneralPropensity.replace("gene", gene)
        entry = {"species1": gene + "_mRNA", "change1": -1, "propensity": degradationPropensity, "species2": "-", "change2":"-"}
        reactionList.append(entry)

    reactionDF = pd.DataFrame(reactionList)
    
    reactionDF['time'] = pd.Series(["-"] * len(reactionDF))
    return reactionDF

### Setting up the initial sytem 
for all genes, I = 1, A = 0 and mRNA = 0.

In [4]:
def prepareInitialState(geneList):
    stateList = []
    for gene in geneList:
        activatedSpecies = {"species": gene + "_A", "count":0}
        inactivatedSpecies = {"species": gene + "_I", "count":1}
        mRNASpecies = {"species": gene + "_mRNA", "count":0}
        stateList.append(activatedSpecies)
        stateList.append(inactivatedSpecies)
        stateList.append(mRNASpecies)

    stateListDF = pd.DataFrame(stateList)
    return stateListDF

In [10]:
from sympy import symbols, simplify, parse_expr
from sympy.core.sympify import SympifyError  # Correctly import SympifyError
import re

# Define symbols for parameters
rOn, rOff, d, rProd = symbols('rOn rOff d rProd')

def regexReplace(expression, old, new):
    # Wrap the old symbol in word boundaries to ensure complete matches
    pattern = r'\b' + re.escape(old) + r'\b'
    # Replace all instances in the expression
    return re.sub(pattern, new, expression)

def preciseRestore(expression):
    # Dictionary of replacements with complete symbol names
    replacements = {
        'rOn': '{rOn}', 'rOff': '{rOff}', 'd': '{d}', 'rProd': '{rProd}', 'rDeg': '{rDeg}', 
        'k': '{k}', 'n': '{n}', 'rAdd': '{rAdd}'
    }

    for old, new in replacements.items():
        expression = regexReplace(expression, old, new)

    # Replace 'g_' back to 'g-' for species identifiers
    expression = expression.replace('g_', 'g-')

    return expression

def simplifyAndRestore(expression):
    try:
        # Prepare the expression for sympy
        expression = expression.replace('{', '').replace('}', '').replace('g-', 'g_')
        expr = parse_expr(expression, evaluate=False)
        simplified = simplify(expr)

        # Precisely restore the original formatting
        restoredExpression = preciseRestore(str(simplified))
        return restoredExpression
    except SympifyError as e:
        print(f"Error sympifying expression: {expression}. Error: {e}")
        return expression
    except Exception as e:
        print(f"General error with expression: {expression}. Error: {e}")
        return expression

### The main function 
Input the number of genes and outputs graph.gml which is a graph object, reactions.csv containing all the propensity equations for all entities and initialStates.csv which contains information of initial concentrations of all entities.

In [11]:
def main(outputPath, nGene, plotPath):
    DG, geneList = createLinearGraph(outputPath, nGene)
    print(geneList)
    drawGraph(DG, plotPath)
    reactions = prepareReactions(geneList, DG)
    reactions.to_csv(os.path.join(outputPath, "reactions.csv"), index=False)
    aggregated = reactions.groupby(['species1', 'change1', 'species2', 'change2', 'time'])['propensity'].apply(lambda x: ' + '.join(x)).reset_index()

# Apply simplification and format restoration
    aggregated['propensity'] = aggregated['propensity'].apply(lambda x: simplifyAndRestore(x))
    aggregated.to_csv(os.path.join(outputPath, "reactions_other.csv"), index=False)
    initalState = prepareInitialState(geneList)
    initalState.to_csv(os.path.join(outputPath, "initialState.csv"), index=False)
    



In [19]:
# nGene =6
for nGene in np.arange(5,15,1):
    outputPath = f"/home/mzo5929/Keerthana/GRNsimulation/highthroughputData/linearNetworks/reactions_{nGene}/"
    plotPath = f"/home/mzo5929/Keerthana/GRNsimulation/highthroughputData/linearNetworks/graphImages_{nGene}.png"
    main(outputPath, nGene, plotPath)

['g-2', 'g-1', 'g0', 'g1', 'g2']
['g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2']
['g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3']
['g-4', 'g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3']
['g-4', 'g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3', 'g4']
['g-5', 'g-4', 'g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3', 'g4']
['g-5', 'g-4', 'g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3', 'g4', 'g5']
['g-6', 'g-5', 'g-4', 'g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3', 'g4', 'g5']
['g-6', 'g-5', 'g-4', 'g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3', 'g4', 'g5', 'g6']
['g-7', 'g-6', 'g-5', 'g-4', 'g-3', 'g-2', 'g-1', 'g0', 'g1', 'g2', 'g3', 'g4', 'g5', 'g6']
