In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch.distributions import Dirichlet, Bernoulli, Uniform
import pandas as pd
from tqdm import tqdm as tm

from src import Simulation as sim
from src import Dir_Reg
from src import Align
from src import visualize_latent_space as vls

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device = torch.device("mps") if torch.backends.mps.is_available() else device

print(device)

cuda


Generate data sets that illustrate how the parameters of the model influence the behavior of the model. 
<br>
Settings:
<br>
Length of Time: 20 or 200
<br>
Embedding Dimemsion: 2
<br>
Number of Nodes: 1200
<br>
Parameters:  (1, 1, 5, 5), (1, 1, 2, 5), (1, 1, -2, 5), (1, 1, -5, 5)
<br>
Initial Distribution: Dir(1, 1, 1)


In [None]:
torch.manual_seed(4)

T, n, alpha_0 = 20, 30, [[1,1,1], [1,1,1], [1,1,1]]

model_pos_2 = sim.ABC(time = T,
                    nodes = n,
                    beta = [1, 1, 2, 5],
                    alpha_0 = alpha_0)
model_pos_1 = sim.ABC(time = T*10,
                    nodes = n,
                    beta = [1, 1, 1 , 5],
                    alpha_0 = alpha_0)
model_neg_2 = sim.ABC(time = T*10,
                    nodes = n,
                    beta = [1, 1, -2, 5],
                    alpha_0 = alpha_0)
model_neg_5 = sim.ABC(time = T,
                    nodes = n,
                    beta = [1, 1, -5, 5],
                    alpha_0 = alpha_0)

In [None]:
sim.ABC_Monte_Carlo.lat_pos(model_pos_2.synth_data["lat_pos"], 3).to_csv(r"simulated_data/time_vs_lat_pos/pos_2_sample.csv")
sim.ABC_Monte_Carlo.lat_pos(model_pos_1.synth_data["lat_pos"], 3).to_csv(r"simulated_data/time_vs_lat_pos/pos_1_sample.csv")
sim.ABC_Monte_Carlo.lat_pos(model_neg_2.synth_data["lat_pos"], 3).to_csv(r"simulated_data/time_vs_lat_pos/neg_2_sample.csv")
sim.ABC_Monte_Carlo.lat_pos(model_neg_5.synth_data["lat_pos"], 3).to_csv(r"simulated_data/time_vs_lat_pos/neg_5_sample.csv")

Below we generate the synthetic data set that shows how the latent position in ABCDPRGM evolves through time under different settings.

Let $\widehat{B} \in \mathbb{R}^{q \times p}$ be the MLE that corresponds to the design matrix $X \otimes I_p$, and $\tilde{\beta} = (C^T C)^{-1} C^T \widehat{B}$. Let $\widehat{\beta}$ be the MLE that corresponds to the design matrix $(X \otimes I_p)C$.

We first do Monte Carlo simulations to verify the asymptotic behavior of $\widehat{B}$ and $\tilde{\beta}$. 

In [5]:
""" no_oracle option is for the RGD stuff, which is still somewhat problematic. It shouldn't be turned on. """
N = 50
n_set = torch.arange(1500, 6001, 750)
beta = [1,1,-4, 5]
alpha_0 = [[10, 1, 1], [1, 10, 1], [1, 1, 10]]
OL, OA, NO = True, True, True
ntypes = OL + OA + NO

temp = sim.ABC_Monte_Carlo.consistency_T2(number_of_iterations = N, 
                                          nodes_set = n_set,
                                          beta = beta,
                                          alpha_0 = alpha_0,
                                          seeded = True,
                                          constrained = False,
                                          oracle_lat_pos = OL,
                                          oracle_align = OA,
                                          no_oracle = NO)

tensor(1500): 100%|██████████| 50/50 [00:16<00:00,  3.02it/s]
tensor(2250): 100%|██████████| 50/50 [00:22<00:00,  2.23it/s]
tensor(3000): 100%|██████████| 50/50 [00:32<00:00,  1.55it/s]
tensor(3750): 100%|██████████| 50/50 [00:45<00:00,  1.09it/s]
tensor(4500): 100%|██████████| 50/50 [01:00<00:00,  1.21s/it]
tensor(5250): 100%|██████████| 50/50 [01:16<00:00,  1.53s/it]
tensor(6000): 100%|██████████| 50/50 [01:34<00:00,  1.89s/it]


In [None]:
temp.MC_result.est.to_csv(r"simulated_data/emp_var_vs_obs_var/B_oracle.csv")
temp.MC_result.fish.to_csv(r"simulated_data/emp_var_vs_obs_var/B_fish.csv")

In [None]:
model = sim.ABC(time = 2,
            nodes = 3,
            beta = [1,1,-4, 5],
            alpha_0 = [[10, 1, 1], [1, 10, 1], [1, 1, 10]])
C = model.settings.C

res = torch.tensor(temp.MC_result.est["B_est"]).reshape(N*ntypes* len(n_set), 21).T


df = pd.DataFrame(torch.linalg.solve(C.T @ C, C.T @ res).T)


df.columns = ["dim_" + str(i) for i in range(4)]
sns.histplot(df["dim_3"])


In [None]:
sns.scatterplot(data = temp.MC_result.est, x = "nodes", y = "info_lost")