In [3]:
import sys
sys.path.append('../')
import torch
import matplotlib.pyplot as plt
from notebook_setup import device, smooth_graph, create_new_set_of_models, train_models_and_get_histories, update_dict
from oslow.models.oslow import OSlow
from oslow.data.synthetic.graph_generator import GraphGenerator
from oslow.data.synthetic.utils import RandomGenerator
from oslow.data.synthetic.parametric import AffineParametericDataset
from oslow.models.normalization import ActNorm
from tqdm import tqdm
import numpy as np

%load_ext autoreload
%autoreload 2
print(device)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/amli/miniconda3/envs/oslow/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/amli/miniconda3/envs/oslow/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/amli/miniconda3/envs/oslow/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/amli/miniconda3/envs/oslow/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in l

cuda


Generate a causal graph using the GraphGenerator class. Here, we specify the number of nodes (3) and enforce a specific ordering [1, 0, 2]. This graph will be used as the ground truth for our causal discovery experiment.

In [4]:
graph_generator = GraphGenerator(
    num_nodes=3,
    seed=0,
    graph_type="full",
    enforce_ordering=[1, 0, 2],
)
graph = graph_generator.generate_dag()

Here, we generate synthetic data based on the causal graph. We create an AffineParametericDataset 
with sinusoidal links between variables. This dataset will be used to train our OSlow models and 
test our causal discovery strategy.

In [5]:
num_samples = 10000
gaussian_noise_generator = RandomGenerator('normal', seed=10, loc=0, scale=1)
link_generator = RandomGenerator('uniform', seed=110, low=1, high=1)

dset_sinusoidal = AffineParametericDataset(
    num_samples=num_samples,
    graph_generator=graph_generator,  # Not graph, requires a GraphGenerator object that generates the DAG
    noise_generator=gaussian_noise_generator,
    link_generator=link_generator,
    link="sinusoid",
    perform_normalization=False,
)

This cell defines the settings for our OSlow models and their training process. We specify the model 
architecture (additive or not, number of transforms, normalization method) and training parameters 
(batch size, learning rate, number of epochs). These settings will be used for all OSlow models we create.

In [17]:
base_model_instantiation_setting = dict(
    additive = False,
    num_transforms = 1,
    normalization = ActNorm,
    base_distribution = torch.distributions.Normal(loc=0, scale=1),
    use_standard_ordering=False,
)

base_training_setting = dict(
    batch_size=512,
    lr=0.005,
    epoch_count=33,
    use_standard_ordering=False,
)

Create a warm-up model that can be used for other initializations.

In [22]:
def generate_random_permutation_matrix(size):
    perm = torch.randperm(size)
    return torch.eye(size)[perm]

def warm_up_oslow(data, base_model_instantiation_setting, base_training_setting, num_warmup_epochs=50):
    # Create a single OSlow model
    warmup_model = create_new_set_of_models(
        single_ordering='012',  # This ordering doesn't matter as we'll use random permutations
        **base_model_instantiation_setting
    )

    # Modify training settings for warm-up
    warmup_training_setting = update_dict(
        base_training_setting,
        epoch_count=num_warmup_epochs,
        use_standard_ordering=False  # We'll provide random permutations
    )

    # Custom training loop for warm-up
    optimizer = torch.optim.Adam(warmup_model.parameters(), lr=warmup_training_setting['lr'])
    for epoch in range(num_warmup_epochs):
        for batch in torch.utils.data.DataLoader(data, batch_size=warmup_training_setting['batch_size'], shuffle=True):
            optimizer.zero_grad()
            perm_mat = generate_random_permutation_matrix(3).to(batch.device)
            loss = -warmup_model.log_prob(batch, perm_mat=perm_mat).mean()
            loss.backward()
            optimizer.step()

    return warmup_model

def transfer_weights(source_model, target_model):
    target_dict = target_model.state_dict()
    source_dict = source_model.state_dict()
    for name in target_dict:
        if name in source_dict:
            target_dict[name].data.copy_(source_dict[name].data)
    target_model.load_state_dict(target_dict)


def create_and_train_oslow_model_with_warmup(ordering, data, warmup_model, base_model_instantiation_setting, base_training_setting):
    # Create a new model with the specified ordering
    model = create_new_set_of_models(
        single_ordering=ordering,
        **base_model_instantiation_setting
    )

    # Transfer weights from the warm-up model
    transfer_weights(warmup_model, model)

    # Train the model
    history = train_models_and_get_histories(
        {ordering: model},
        data,
        **base_training_setting
    )

    return model, history[ordering]

Need to modify this function.

In [23]:
def create_and_train_oslow_model_with_start_covariate(start_covariate, dset):
    ordering = ''.join(str(i) for i in ([start_covariate] + [j for j in range(3) if j != start_covariate]))
    # by specifying single_ordering, we only make one model
    print(f"Creating model with ordering: {ordering}")
    model = create_new_set_of_models(
        single_ordering=ordering,
        **base_model_instantiation_setting
    )
    
    history = train_models_and_get_histories(
        {ordering: model},
        dset,
        **base_training_setting
    )
    
    return model, history[ordering]


def determine_causal_ordering(data, base_model_instantiation_setting, base_training_setting):
    all_models = {}
    all_histories = {}

    # Perform warm-up training
    warmup_model = warm_up_oslow(data, base_model_instantiation_setting, base_training_setting)

    def recursive_ordering(fixed_order):
        if len(fixed_order) == 3:
            return fixed_order

        available_covariates = [i for i in range(3) if i not in fixed_order]
        stage_key = '-'.join(map(str, fixed_order))

        for start_covariate in available_covariates:
            ordering = ''.join(map(str, fixed_order + [start_covariate] + [i for i in available_covariates if i != start_covariate]))
            model_key = f"{stage_key}-{start_covariate}" if stage_key else f"start_{start_covariate}"

            model, history = create_and_train_oslow_model_with_warmup(
                ordering, data, warmup_model, base_model_instantiation_setting, base_training_setting
            )
            all_models[model_key] = model
            all_histories[model_key] = history

        stage_log_probs = {name: np.mean(history) for name, history in all_histories.items() if name.startswith(stage_key) or (not stage_key and name.startswith("start_"))}
        best_model = min(stage_log_probs, key=stage_log_probs.get)
        best_covariate = int(best_model.split('_')[-1] if '_' in best_model else best_model.split('-')[-1])

        new_fixed_order = fixed_order + [best_covariate]
        return recursive_ordering(new_fixed_order)

    causal_order = recursive_ordering([])
    return causal_order, all_models, all_histories


In [18]:
causal_order, trained_models, training_histories = determine_causal_ordering(dset_sinusoidal)

print(f"Inferred causal order: {causal_order}")
print(f"True causal order: [1, 0, 2]")

Creating model with ordering: 012


training model 012: 100%|██████████| 33/33 [00:07<00:00,  4.54it/s]


Creating model with ordering: 102


training model 102: 100%|██████████| 33/33 [00:07<00:00,  4.38it/s]


Creating model with ordering: 201


training model 201: 100%|██████████| 33/33 [00:06<00:00,  5.20it/s]


Creating model with ordering: 012


training model 012: 100%|██████████| 33/33 [00:06<00:00,  5.16it/s]


Creating model with ordering: 201


training model 201: 100%|██████████| 33/33 [00:06<00:00,  5.17it/s]


Creating model with ordering: 201


training model 201: 100%|██████████| 33/33 [00:06<00:00,  5.16it/s]

Inferred causal order: [1, 0, 0]
True causal order: [1, 0, 2]





In [None]:
# Visualize negative log probabilities for each model
plt.figure(figsize=(12, 6))
for model_name, history in training_histories.items():
    plt.plot(smooth_graph({model_name: history})[model_name], label=model_name)
plt.xlabel('Epoch')
plt.ylabel('Negative Log Probability')
plt.title('Smoothed Negative Log Probabilities During Training')
plt.legend()
plt.show()