In [43]:
#Code to generate parameters for Gillespie simulations - positive regulation

In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Parameter generation


In [2]:
from scipy.stats import qmc
import pandas as pd
import numpy as np

In [None]:
# --- Define parameters ---
gene_params = [
    "p_on", "p_off", "mrna_half_life", "protein_half_life",
    "p_prod_protein", "p_prod_mRNA"
]
interaction_params = ["n_gene_1_to_gene_2", "p_add_gene_1_to_gene_2", "n_gene_2_to_gene_1", "p_add_gene_2_to_gene_1"]

param_names = (
    [f"{p}_gene_1" for p in gene_params] +
    [f"{p}_gene_2" for p in gene_params] +
    interaction_params
)

param_bounds = {
    "p_on": (0.01, 3),
    "p_off": (0.05, 60),
    "mrna_half_life": (1, 20),
    "protein_half_life": (5, 200),
    "p_prod_mRNA": (0.2, 60),
    "p_prod_protein": (15, 2700),
    "n_gene_1_to_gene_2": (0.1, 5),
    "n_gene_2_to_gene_1": (0.1, 5),
    "p_add_gene_1_to_gene_2": (2, 10),
    "p_add_gene_2_to_gene_1": (2, 10),
}

bounds = (
    [param_bounds[p] for p in gene_params] +
    [param_bounds[p] for p in gene_params] +
    [param_bounds[p] for p in interaction_params]
)

# --- Constraint function ---
def hl_to_deg(hl):
    return np.log(2) / hl


# --- Sampling ---
n_valid_required = 25000
oversample_factor = 1
n_attempts = int(n_valid_required * oversample_factor)
seed = 42

# Latin Hypercube Sampling
# Convert bounds to log10 space
log_bounds_lower = [np.log10(b[0]) for b in bounds]
log_bounds_upper = [np.log10(b[1]) for b in bounds]

# Latin Hypercube Sampling in log space
sampler = qmc.LatinHypercube(d=len(bounds), seed=seed)
sample = sampler.random(n=n_attempts)

# Scale sample in log space
scaled_log_sample = qmc.scale(sample, log_bounds_lower, log_bounds_upper)

# Convert back to linear space
scaled_samples = 10 ** scaled_log_sample

# Create DataFrame
df = pd.DataFrame(scaled_samples, columns=param_names)


# Filter valid samples
df_valid = df #[valid_mask]

# --- Print acceptance stats ---
print(f"Generated {n_attempts} samples")

# Ensure enough valid samples
if len(df_valid) < n_valid_required:
    raise ValueError(f"Only {len(df_valid)} valid samples found. Increase oversample_factor or relax constraint.")

# Keep only first 25,000 valid
df_valid = df_valid.iloc[:n_valid_required].reset_index(drop=True)

import matplotlib.pyplot as plt

# Plot histograms of all parameters in df_valid
fig, axes = plt.subplots(nrows=5, ncols=4, figsize=(20, 15))
axes = axes.flatten()

for i, col in enumerate(df_valid.columns):
    axes[i].hist(np.log10(df_valid[col]), bins=100, color='gray', edgecolor='black')
    axes[i].set_title(f"log10({col})")
    axes[i].set_ylabel("Frequency")
    axes[i].set_xlabel("log10(Value)")

# Hide any unused subplots
for j in range(i + 1, len(axes)):
    axes[j].axis("off")

plt.tight_layout()
plt.show()


# --- Expand to long format ---
rows = []
for idx, row in df_valid.iterrows():
    g1 = {k.replace("_gene_1", ""): v for k, v in row.items() if "_gene_1" in k and "_gene_2" not in k}
    g2 = {k.replace("_gene_2", ""): v for k, v in row.items() if "_gene_2" in k and "_gene_1" not in k}
    interaction = {k: v for k, v in row.items() if "gene_1_to_gene_2" in k}
    interaction_2 = {k: v for k, v in row.items() if "gene_2_to_gene_1" in k}
    rows.append({**g1, **interaction, **interaction_2, "pair_id": idx, "gene_id": 1})
    rows.append({**g2, **interaction, **interaction_2, "pair_id": idx, "gene_id": 2})

final_df = pd.DataFrame(rows).reset_index(drop=True)

# --- Save ---
output_path = "/home/mzo5929/Keerthana/grnInference/simulation_data/gillespie_simulation/sim_details/lhc_sampled_parameters_bidirectional_positive_reg_2.csv"
final_df.to_csv(output_path, index=True)
print(f"\n✅ Saved to {output_path}")


In [1]:
# param_df = pd.read_csv("/home/mzo5929/Keerthana/grnInference/simulation_data/gillespie_simulation/sim_details/lhc_sampled_parameters_negative_reg.csv", index_col = 0)

import numpy as np
import pandas as pd

def hl_to_deg(hl):
    """Convert half-life to degradation rate."""
    return np.log(2) / hl

def compute_steady_state_levels(param_df, gene_id):
    """Compute mean mRNA and protein levels for gene_id (1 or 2), assuming hill = 0.5."""
    assert gene_id in [1, 2], "gene_id must be 1 or 2"

    # Basic parameters
    p_on = param_df["p_on"]
    p_off = param_df["p_off"]
    prod_m = param_df["p_prod_mRNA"]
    prod_p = param_df["p_prod_protein"]
    deg_m = hl_to_deg(param_df["mrna_half_life"])
    deg_p = hl_to_deg(param_df["protein_half_life"])

    # Use .get to safely retrieve interaction term or default to 0
    if gene_id == 2:
        p_add = param_df.get("p_add_gene_1_to_gene_2", 0.0)
    else:
        p_add = param_df.get("p_add_gene_2_to_gene_1", 0.0)

    # Compute effective p_on using hill response = 0.5
    p_on_eff = p_on + 0.5 * p_add
    burst_prob = p_on_eff / (p_on_eff + p_off)

    # Steady-state means
    mean_mRNA = burst_prob * prod_m / deg_m
    mean_protein = mean_mRNA * prod_p / deg_p

    # Store results in DataFrame
    param_df["mean_mRNA_level"] = mean_mRNA
    param_df["mean_protein_level"] = mean_protein

    return param_df

# Usage
param_df = pd.read_csv("/home/mzo5929/Keerthana/grnInference/simulation_data/gillespie_simulation_run_2/sim_details/lhc_sampled_parameters_positive_reg_2.csv", index_col = 0)
param_df = compute_steady_state_levels(param_df, gene_id=2)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Filter out zero or negative values (log scale can't handle them)
values = param_df['mean_protein_level']
values = values[values > 0]

# Define log-spaced bins
n_bins = 100
min_val = values.min()
max_val = values.max()
log_bins = np.logspace(np.log10(min_val), np.log10(max_val), n_bins)

# Plot
plt.figure(figsize=(6, 4))
plt.hist(values, bins=log_bins)
plt.xscale('log')
# plt.yscale('log')
plt.xlabel("Mean protein level (log scale)")
plt.ylabel("Frequency (log scale)")
plt.title("Log-Binned Histogram of Mean Protein Levels")
plt.tight_layout()
plt.show()


In [None]:
param_df[param_df['mean_mRNA_level'] < 100].shape

In [None]:
plt.hist(param_df[param_df['mean_mRNA_level'] < 1]['mean_mRNA_level'])

## Generate test parameters


In [32]:
import pandas as pd
import numpy as np

# Initial values
gene_1 = [0.55, 8.08, 5, 45, 2, 500, 2, 6, 7, 1]
gene_2 = [0.55, 8.08, 5, 45, 2, 500, 2, 6, 7, 2]

r_add = np.arange(0.1, 1, 0.05)
rows = []

for i, r_add_curr in enumerate(r_add):
    gene_1[7] = r_add_curr
    gene_1[8] = i

    gene_2[7] = r_add_curr
    gene_2[8] = i

    rows.append(gene_1.copy())
    rows.append(gene_2.copy())

# Create DataFrame
df = pd.DataFrame(rows)
df.to_csv("/home/mzo5929/Keerthana/grnInference/simulation_data/gillespie_simulation_test/sim_details/effect_of_radd_negative.csv")

