## This is computing the Hessian for various TIME OF DIVERGENCE values
The parameter we compute the hessian with respect to is the ancestral population

In [None]:
import msprime as msp
import demes
import demesdraw
import numpy as np

time_of_divergence = [100., 300., 500., 750., 1000., 2000., 3000., 5000., 7500., 10000., 12500., 15000., 17500., 20000., 22500., 25000., 27500., 30000.]
params = [("demes", 0, "epochs", 0, "start_size")]

for tau in time_of_divergence: 
    Ne = 1e4
    q_anc = 1e4
    q0 = 1e4
    q1 = 1e4
    m = 0.0001
    demo = msp.Demography()
    demo.add_population(initial_size= q_anc, name = "anc")
    demo.add_population(initial_size = q0, name = "P0")
    demo.add_population(initial_size = q1, name = "P1")
    demo.set_symmetric_migration_rate(populations=("P0", "P1"), rate= m)
    tmp = [f"P{i}" for i in range(2)]
    demo.add_population_split(time = tau, derived=tmp, ancestral="anc")
    g = demo.to_demes()
    # demesdraw.tubes(g)
    # print(g)
    sample_size = 10
    samples = {f"P{i}": sample_size for i in range(2)}
    anc = msp.sim_ancestry(samples=samples, demography=demo, recombination_rate=1e-8, sequence_length=1e8)
    ts = msp.sim_mutations(anc, rate=1e-8)

    from hessian_approximation import hessian_approximation
    results = hessian_approximation(g, ts, params, "BOTH", np.array([q_anc]))
    print(results)

    import pickle
    filepath = f"/Users/jkliang/Desktop/fit_refactor_VMAP/anc_1e4Ne_{tau}t.pkl"
    with open(filepath, 'wb') as file:  # 'wb' = write binary
        pickle.dump(results, file)

In [None]:
import matplotlib.pyplot as plt
import pickle

# time_of_divergence = [100., 300., 500., 750., 1000., 2000., 3000., 5000., 7500., 10000., 12500., 15000., 17500., 20000., 100000.]
time_of_divergence = [100., 300., 500., 750., 1000., 2000., 3000., 5000., 7500., 10000., 12500., 15000., 17500., 20000., 22500., 25000., 27500., 30000.]
results = []
for tau in time_of_divergence:
    filepath = f"/Users/jkliang/Desktop/fit_refactor_VMAP/anc_{tau}t.pkl"
    with open(filepath, 'rb') as file:  # 'rb' = read binary
        results.append(pickle.load(file))

# Extract first and third index values
first_values = [item[0][0][0] for item in results]  # Extracts the scalar from each first Array
third_values = [item[2][0][0] for item in results]   # Extracts the scalar from each third Array

# Create the plot
plt.figure(figsize=(12, 6))

# Plot first values (blue line)
plt.plot(time_of_divergence, first_values, 'b-', label='Momi IICR + SMC Prime', marker='o')

# Plot third values (red line)
plt.plot(time_of_divergence, third_values, 'r-', label='Momi SFS', marker='s')

# Customize the plot
# plt.xscale('log')  # Use log scale if time spans several orders of magnitude
plt.xlabel('Time of Divergence')
plt.ylabel('Negative Hessian Value')
plt.title('Hessian Values vs Time of Divergence')
plt.legend()
plt.grid(True, which="both", ls="--")

# Show the plot
plt.show()

## This is computing the Hessian for various ANCESTRAL POPULATION SIZE values
The parameter we compute the hessian with respect to is the ancestral population and we keep time of divergence fixed

In [None]:
import msprime as msp
import demes
import demesdraw
import numpy as np

true_anc_pop = np.geomspace(1e3, 1e5, 30)
params = [("demes", 0, "epochs", 0, "start_size")]

for Ne_anc in true_anc_pop: 
    q0 = 1e4 
    q1 = 1e4
    tau = 1000
    m = 0.0001
    demo = msp.Demography()
    demo.add_population(initial_size= Ne_anc, name = "anc")
    demo.add_population(initial_size = q0, name = "P0")
    demo.add_population(initial_size = q1, name = "P1")
    demo.set_symmetric_migration_rate(populations=("P0", "P1"), rate= m)
    tmp = [f"P{i}" for i in range(2)]
    demo.add_population_split(time = tau, derived=tmp, ancestral="anc")
    g = demo.to_demes()
    # demesdraw.tubes(g)
    # print(g)
    sample_size = 10
    samples = {f"P{i}": sample_size for i in range(2)}
    anc = msp.sim_ancestry(samples=samples, demography=demo, recombination_rate=1e-8, sequence_length=1e8, random_seed=42)
    ts = msp.sim_mutations(anc, rate=1e-8, random_seed=42)

    from hessian_approximation import hessian_approximation
    results = hessian_approximation(g, ts, params, "BOTH", np.array([Ne_anc]))
    print(results)

    import pickle
    filepath = f"/Users/jkliang/Desktop/fit_refactor_VMAP/anc_{Ne_anc}Ne_1000tau.pkl"
    with open(filepath, 'wb') as file:  # 'wb' = write binary
        pickle.dump(results, file)

In [None]:
import matplotlib.pyplot as plt
import pickle

# time_of_divergence = [100., 300., 500., 750., 1000., 2000., 3000., 5000., 7500., 10000., 12500., 15000., 17500., 20000., 100000.]
time_of_divergence = [100., 300., 500., 750., 1000., 2000., 3000., 5000., 7500., 10000., 12500., 15000., 17500., 20000., 22500., 25000., 27500., 30000.]
results = []
for tau in time_of_divergence:
    filepath = f"/Users/jkliang/Desktop/fit_refactor_VMAP/anc_{tau}t.pkl"
    with open(filepath, 'rb') as file:  # 'rb' = read binary
        results.append(pickle.load(file))

# Extract first and third index values
first_values = [item[0][0][0] for item in results]  # Extracts the scalar from each first Array
third_values = [item[2][0][0] for item in results]   # Extracts the scalar from each third Array

# Create the plot
plt.figure(figsize=(12, 6))

# Plot first values (blue line)
plt.plot(time_of_divergence, first_values, 'b-', label='Momi IICR + SMC Prime', marker='o')

# Plot third values (red line)
plt.plot(time_of_divergence, third_values, 'r-', label='Momi SFS', marker='s')

# Customize the plot
# plt.xscale('log')  # Use log scale if time spans several orders of magnitude
plt.xlabel('Time of Divergence')
plt.ylabel('Negative Hessian Value')
plt.title('Hessian Values vs Time of Divergence')
plt.legend()
plt.grid(True, which="both", ls="--")

# Show the plot
plt.show()