In [4]:
import os
import sys


def setup_project_root(start_path='.'):
    """Find the project root, set it as the current working directory, and add it to sys.path."""
    current_path = os.path.abspath(start_path)
    while True:
        if '.git' in os.listdir(current_path):
            project_root = current_path
            break
        parent_path = os.path.dirname(current_path)
        if parent_path == current_path:  # We've reached the root directory
            raise Exception("Could not find project root (.git directory not found)")
        current_path = parent_path
    
    # Change the current working directory to the project root
    os.chdir(project_root)
    print(f"Current working directory set to: {os.getcwd()}")

    # Add project root to sys.path if it's not already there
    if project_root not in sys.path:
        sys.path.insert(0, project_root)
        print(f"Added {project_root} to sys.path")

# sets the current working directory to the project root
setup_project_root()

# Don't cache imports
%load_ext autoreload
%autoreload 2


Current working directory set to: /vol/bitbucket/dm2223/info-theory-experiments
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from custom_datasets import BitStringDataset
from models import SupervenientFeatureNetwork
import torch
from models import SkipConnectionSupervenientFeatureNetwork
from trainers import train_feature_network



/vol/bitbucket/dm2223/info-theory-experiments/info_theory_experiments/bits_experiments


ModuleNotFoundError: No module named 'custom_datasets'

# Experiment to show that diversity loss term is needed
This will work by training 10 different feature networks, freezing them, running them in eval mode, and showing they all learn the same bit

When we verify we will use weak critics

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for seed in range(6,10):
    bits_config_train = {
            "gamma_parity": 0.99,
            "gamma_extra": 0.99,
            "dataset_length": 1000000,
            "torch_seed": seed,
            "dataset_type": "bits",
            "num_atoms": 6,
            "batch_size": 1000,
            "train_mode": True,
            "train_model_B": False,
            "adjust_Psi": False,
            "clip": 5,
            "feature_size": 1,
            "epochs": 5,
            "start_updating_f_after": 1000,
            "update_f_every_N_steps": 5,
            "minimize_neg_terms_until": 0,
            "downward_critics_config": {
                "hidden_sizes_v_critic": [512, 512, 512, 256],
                "hidden_sizes_xi_critic": [512, 512, 512, 256],
                "critic_output_size": 32,
                "lr": 1e-3,
                "bias": True,
                "weight_decay": 0,
            },
            
            "decoupled_critic_config": {
                "hidden_sizes_encoder_1": [512, 512, 512],
                "hidden_sizes_encoder_2": [512, 512, 512],
                "critic_output_size": 32,
                "lr": 1e-3,
                "bias": True,
                "weight_decay": 0,
            },
            "feature_network_config": {
                "hidden_sizes": [256, 256],
                "lr": 1e-4,
                "bias": True,
                "weight_decay": 0.00001,
            }
    }

    dataset = BitStringDataset(
        gamma_extra=bits_config_train["gamma_extra"],
        gamma_parity=bits_config_train["gamma_parity"],
        length=bits_config_train["dataset_length"],
    )

    trainloader = torch.utils.data.DataLoader(
        dataset, batch_size=bits_config_train["batch_size"], shuffle=False
    )

    skip_model = SkipConnectionSupervenientFeatureNetwork(
        num_atoms=bits_config_train['num_atoms'],
        feature_size=bits_config_train['feature_size'],
        hidden_sizes=bits_config_train['feature_network_config']['hidden_sizes'],
        include_bias=bits_config_train['feature_network_config']['bias'],
    ).to(device)

    project_name_train = "NEURIPS-diversity-ablation-training"

    skip_model = train_feature_network(
            config=bits_config_train,
            trainloader=trainloader,
            feature_network_training=skip_model,
            project_name=project_name_train,
            model_dir_prefix=None
    )

    bits_config_test = {
            "gamma_parity": 0.99,
            "gamma_extra": 0.99,
            "dataset_length": 1000000,
            "torch_seed": seed,
            "dataset_type": "bits",
            "num_atoms": 6,
            "batch_size": 1000,
            "train_mode": False,
            "train_model_B": False,
            "adjust_Psi": False,
            "clip": 5,
            "feature_size": 1,
            "epochs": 2,
            "start_updating_f_after": 1000,
            "update_f_every_N_steps": 5,
            "minimize_neg_terms_until": 0,
            "downward_critics_config": {
                "hidden_sizes_v_critic": [256, 256, 256],
                "hidden_sizes_xi_critic": [256, 256, 256],
                "critic_output_size": 32,
                "lr": 1e-3,
                "bias": True,
                "weight_decay": 0,
            },
            
            "decoupled_critic_config": {
                "hidden_sizes_encoder_1": [256, 256],
                "hidden_sizes_encoder_2": [256, 256],
                "critic_output_size": 32,
                "lr": 1e-3,
                "bias": True,
                "weight_decay": 0,
            },
            "feature_network_config": {
                "hidden_sizes": [256, 256],
                "lr": 1e-4,
                "bias": True,
                "weight_decay": 0.00001,
            }
    }

    project_name_test = "NEURIPS-diversity-ablation-test"

    skil_model = train_feature_network(
            config=bits_config_test,
            trainloader=trainloader,
            feature_network_training=skip_model,
            project_name=project_name_test,
            model_dir_prefix=None
    )





0,1
Psi,▂▁▅▇▇▇█▇▆█▆▇▆▇▆▄▅▆▅▅▅▄▄▅▂▅▅▅▅▅▄▂▅▃▄▅▃▄▃▅
bonus_bit_MI,▁▁▁▂▄▇▇▂▇▆▇▄▃▇█▇▇▇▇▇▆▆▁█▇█▇▇█▇▇▄▅▇█▆▅█▇▇
decoupled_MI,▂▁▅▇▆▇█▇▆█▆████▇█▆▇▇█▇▇█▅█████▇▅▇▇██▇█▆█
downward_MI_0,▅▁█▆▇▇▇▇██▇▇▇▇▇█▇▇▇█▇▇▇▇█▇▇▆▇███▇▇▇▇█▇▇█
downward_MI_1,▁▄▇▄▃▅▅▅▄▅▃▅▅▅▅▅▅▅▄▅▅▅█▅▇▄▅█▅▄▅▅▅▅▅▄▆▆▄▅
downward_MI_2,█▄▂▁▃▂▂▁▂▃▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂
downward_MI_3,▇▅▂▄█▅▅▅▅▅▅▅▅▅▅▅▅▅▁▄▅▅▅▅▅▅▅▅▅▄▅▅▅▅▅▅▅▅▅▅
downward_MI_4,█▂▂▅▂▁▂▂▃▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂
downward_MI_5,▁▁▁▁▁▁▁▁▂▂▂▂▃▃▄▆▄▅▄▂▅▆▇▄▄▇▅▆▆▆▇▇▅▆▇▆██▇▆
extra_bit_MI,▁▁▁▁▁▂▃▄▅▄▆▃▅▆▅▇▆▅▅▂▇█▇▃▅▇▇▇▇▆█▇▇▇█▇████

0,1
Psi,0.48571
bonus_bit_MI,0.48987
decoupled_MI,0.89267
downward_MI_0,-1e-05
downward_MI_1,0.0
downward_MI_2,-0.0
downward_MI_3,-0.0
downward_MI_4,-1e-05
downward_MI_5,0.48272
extra_bit_MI,0.54451


Training:  40%|████      | 2/5 [01:36<02:24, 48.25s/it]


KeyboardInterrupt: 