In [1]:
from custom_datasets import FMRIDatasetConcat
import torch
from torch import nn, optim
from torch.utils.data import DataLoader

class Autoencoder(nn.Module):
    def __init__(self, input_size, feature_size, hidden_sizes):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_sizes[0]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[0], hidden_sizes[1]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[1], hidden_sizes[2]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[2], hidden_sizes[3]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[3], feature_size)
        )
        self.decoder = nn.Sequential(
            nn.Linear(feature_size, hidden_sizes[3]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[3], hidden_sizes[2]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[2], hidden_sizes[1]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[1], hidden_sizes[0]),
            nn.ReLU(True),
            nn.Linear(hidden_sizes[0], input_size)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

seed = 42 
torch.manual_seed(seed)


<torch._C.Generator at 0x7fcb62bc3ef0>

In [2]:

dataset = FMRIDatasetConcat()
device = 'cuda'

for i in range(1):
    config = {
        "torch_seed": seed,
        "dataset_type": "FMRI",
        "nuem_atoms": 100,
        "batch_size": 1000,
        "train_mode": True,
        "train_model_B": False,
        "adjust_Psi": False,
        "clip": 5,
        "feature_size": 3,
        "epochs": 75,
        "hidden_sizes": [256, 256, 256, 256, 256],
        "lr": 1e-4,
        "weight_decay": 1e-6,
    }

    torch.manual_seed(config['torch_seed'])
    trainloader = DataLoader(dataset, batch_size=config['batch_size'], shuffle=True)

    input_size = 100
    model = Autoencoder(input_size, config['feature_size'], config['hidden_sizes']).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])

    for epoch in range(config['epochs']):
        for data in trainloader:
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, data)
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{config["epochs"]}], Loss: {loss.item():.4f}')

    torch.save(model.state_dict(), 'models/FMRI-data-autoencoder_model.pth')



Epoch [1/75], Loss: 0.0935
Epoch [2/75], Loss: 0.0896
Epoch [3/75], Loss: 0.0763
Epoch [4/75], Loss: 0.0730
Epoch [5/75], Loss: 0.0684
Epoch [6/75], Loss: 0.0656
Epoch [7/75], Loss: 0.0670
Epoch [8/75], Loss: 0.0651
Epoch [9/75], Loss: 0.0634
Epoch [10/75], Loss: 0.0581
Epoch [11/75], Loss: 0.0557
Epoch [12/75], Loss: 0.0562
Epoch [13/75], Loss: 0.0563
Epoch [14/75], Loss: 0.0523
Epoch [15/75], Loss: 0.0537
Epoch [16/75], Loss: 0.0537
Epoch [17/75], Loss: 0.0533
Epoch [18/75], Loss: 0.0536
Epoch [19/75], Loss: 0.0524
Epoch [20/75], Loss: 0.0540
Epoch [21/75], Loss: 0.0552
Epoch [22/75], Loss: 0.0584
Epoch [23/75], Loss: 0.0553
Epoch [24/75], Loss: 0.0564
Epoch [25/75], Loss: 0.0539
Epoch [26/75], Loss: 0.0538
Epoch [27/75], Loss: 0.0520
Epoch [28/75], Loss: 0.0539
Epoch [29/75], Loss: 0.0523
Epoch [30/75], Loss: 0.0534
Epoch [31/75], Loss: 0.0541
Epoch [32/75], Loss: 0.0527
Epoch [33/75], Loss: 0.0528
Epoch [34/75], Loss: 0.0517
Epoch [35/75], Loss: 0.0515
Epoch [36/75], Loss: 0.0532
E

# see if autoencoder rep is emergent

In [5]:

def get_autoencoder_representation(batch_data):
    """
    Function to get the autoencoder representation of a batch of data.

    Parameters:
    - batch_data: Tensor containing the batch of data.

    Returns:
    - encoded_data: Tensor containing the autoencoder representations.
    """
    # Load the model
    model = Autoencoder(input_size, config['feature_size'], config['hidden_sizes']).to(device)
    model.load_state_dict(torch.load('models/FMRI-data-autoencoder_model.pth'))
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():  # Disable gradient calculation
        batch_data = batch_data.to(device)
        encoded_data = model.encoder(batch_data)

    return encoded_data



project_name = "NEURIPS-testing-if-encoder-rep-is-emergent"

config_test = {    
    "torch_seed": seed,
    "dataset_type": "FMRI",
    "num_atoms": 100,
    "batch_size": 1000,
    "train_mode": False,
    "train_model_B": False,
    "adjust_Psi": True,
    "clip": 5,
    "feature_size": 3,
    "epochs": 10,
    "start_updating_f_after": 100,
    "update_f_every_N_steps": 5,
    "minimize_neg_terms_until": 0,
    "downward_critics_config": {
        "hidden_sizes_v_critic": [512, 1024, 1024, 512],
        "hidden_sizes_xi_critic": [512, 512, 512],
        "critic_output_size": 32,
        "lr": 1e-3,
        "bias": True,
        "weight_decay": 0,
    },
    
    "decoupled_critic_config": {
        "hidden_sizes_encoder_1": [512, 512, 512],
        "hidden_sizes_encoder_2": [512, 512, 512],
        "critic_output_size": 32,
        "lr": 1e-3,
        "bias": True,
        "weight_decay": 0,
    },
    "feature_network_config": {
        "hidden_sizes": [256, 256, 256, 256, 256],
        "lr": 1e-4,
        "bias": True,
        "weight_decay": 1e-6,
    }
}

from trainer_for_RNN_rep_network import train_feature_network

out = train_feature_network(
    config=config_test,
    trainloader=trainloader,
    feature_network_training=get_autoencoder_representation,
    project_name=project_name,
)



0,1
Psi,▆▅█▅▅▅▄▄▃▃▃▂▃▂▃▃▂▃▂▃▃▂▃▂▂▂▂▂▁▂▂▂▂▂▂▂▂▁▂▂
decoupled_MI,▁▁▂▃▄▅▅▅▆▄▅▆▆▆▆▆▇▇▇█▇█▇▇█▇█▆█▇▇▇▇▇█▇▇▇██
downward_MI_0,▆▆▇▁▆▆▆▆▆▆▇▇▇█▇▇▇▇█▇█████▇██████████▇███
downward_MI_1,▅▅▁▆▆▆▆▆▆▆▆▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█████
downward_MI_10,▁▁▃▂▄▃▄▃▁▅▆▆▅▆▅▆▆▆▇▆▆▇▇▇▇▇▆▆▇▇▇▇█▇█▇▇▇██
downward_MI_11,▃▃▂▁▄▄▅▆▅▄▅▆▆▆▆▆▇▇▇▆▆▇▇▆▇▇▇▇▇█▇▇▇▇█▇▇██▇
downward_MI_12,▆▆▁▆▆▆▆▆▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇█▇▇▇▇▇███▇██████
downward_MI_13,▃▃▁▃▄▄▅▅▄▆▆▆▆▆▆▆▆▆▅▆▇▆▇▇▇▆▇▇▇▆█▇█████▇█▇
downward_MI_14,▅▅▁▆▆▆▆▆▆▇▇▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█████
downward_MI_15,▅▆▁▆▆▆▆▆▆▇▇▆▇▇▇▇▇█▇▇▇█▇▇▇████▇█████████▇

0,1
Psi,-22.31507
decoupled_MI,3.57286
downward_MI_0,0.22857
downward_MI_1,0.60365
downward_MI_10,0.35539
downward_MI_11,0.65505
downward_MI_12,0.3758
downward_MI_13,0.43863
downward_MI_14,0.37779
downward_MI_15,0.40048


  model.load_state_dict(torch.load('models/FMRI-data-autoencoder_model.pth'))
Training: 100%|██████████| 10/10 [02:20<00:00, 14.00s/it]


0,1
Psi,█▅▃▂▃▂▂▂▂▂▂▂▂▂▁▂▁▂▂▂▂▁▂▂▂▁▂▁▂▁▁▁▂▂▂▁▂▂▁▂
decoupled_MI,▁▃▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇███▇██▇█
downward_MI_0,▁▄▅▆▆▆▆▆▇▆▆▆▇▆▇▆▆▇▆▆▇▆▆▆▇▇▇▇█▆▇▆▇▇▆▇▆▇█▆
downward_MI_1,▁▅▅▅▆▇▇▇█▇▇█▇▇▇█▇▇▇██▇█▇▇▇███▇▇█▇█▇██▇▇▇
downward_MI_10,▂▁▃▅▄▆▆▆▇▆▇▇▇▇█▇▇█▇▇█▇▇█▇▇████▇█▇▇█▇▇▇▇█
downward_MI_11,▁▅▆▆▆▇▆▇▇▇▇█▇██▇▇█▇█▇███▇██▇██████████▇█
downward_MI_12,▁▅▅▅▆▆▇▇▇▇▇▇▇▇██▇▇██▇███▇▇█████████████▇
downward_MI_13,▁▄▅▅▅▇▇▆▆▇▇▇▇▇█▆▇▇▇▇█▇█▇▇▇█▇▇█▇█▆█▇█▇▇██
downward_MI_14,▁▃▃▄▅▆▅▅▇▇▇▇▇▇▇▇▇▇▆▆▇▇▇▇▇▇█▇▇█▇█▆██▇▆▇▇▇
downward_MI_15,▁▃▃▄▅▆▆▅▆▆▇▇▆▇▆▆▇▆▇▇▇▇▇▇▇▆▇▇▇▇█▇▆▆▇█▇▇█▇

0,1
Psi,-25.74913
decoupled_MI,5.85125
downward_MI_0,0.28967
downward_MI_1,0.76854
downward_MI_10,0.44843
downward_MI_11,0.60899
downward_MI_12,0.50459
downward_MI_13,0.48552
downward_MI_14,0.48614
downward_MI_15,0.474


: 

In [5]:
import torch
from models import SkipConnectionSupervenientFeatureNetwork

# Load the pre-trained model
model_path = 'models/NEURIPS-FMRI-model-A-jumping-serenity-12.pth'
encoder = SkipConnectionSupervenientFeatureNetwork(
    num_atoms=config_test['num_atoms'],
    feature_size=config_test['feature_size'],
    hidden_sizes=config_test['feature_network_config']['hidden_sizes'],
    include_bias=config_test['feature_network_config']['bias']
).to(device)
encoder.load_state_dict(torch.load(model_path))
encoder.eval()  # Set the encoder to evaluation mode

# Define the MLP decoder
class MLPDecoder(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLPDecoder, self).__init__()
        layers = []
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(input_size, hidden_size))
            layers.append(nn.ReLU())
            input_size = hidden_size
        layers.append(nn.Linear(input_size, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Initialize the decoder
decoder = MLPDecoder(
    input_size=3,
    hidden_sizes=[256, 256, 256, 256, 256],
    output_size=100
).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(decoder.parameters(), lr=1e-4, weight_decay=1e-6)

# Training loop
epochs = 75
for epoch in range(epochs):
    for i, batch in enumerate(trainloader):
        x0 = batch[:, 0].to(device).float()
        x1 = batch[:, 1].to(device).float()

        # Get the representation from the encoder
        with torch.no_grad():
            representation = encoder(x0)

        # Predict the next time step
        prediction = decoder(representation)

        # Compute the loss
        loss = criterion(prediction, x1)
        if i == 0:
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()



  encoder.load_state_dict(torch.load(model_path))


Epoch 1/75, Loss: 0.09397035092115402
Epoch 2/75, Loss: 0.09205859899520874
Epoch 3/75, Loss: 0.09147310256958008
Epoch 4/75, Loss: 0.09102212637662888
Epoch 5/75, Loss: 0.09073235094547272
Epoch 6/75, Loss: 0.09073550254106522
Epoch 7/75, Loss: 0.09081390500068665
Epoch 8/75, Loss: 0.09087085723876953
Epoch 9/75, Loss: 0.09087539464235306
Epoch 10/75, Loss: 0.0908716544508934
Epoch 11/75, Loss: 0.09086067229509354
Epoch 12/75, Loss: 0.09085692465305328
Epoch 13/75, Loss: 0.09085702896118164
Epoch 14/75, Loss: 0.09086278825998306
Epoch 15/75, Loss: 0.09086089581251144
Epoch 16/75, Loss: 0.0908709317445755
Epoch 17/75, Loss: 0.09086737781763077
Epoch 18/75, Loss: 0.0908680260181427
Epoch 19/75, Loss: 0.09086751937866211
Epoch 20/75, Loss: 0.09086717665195465
Epoch 21/75, Loss: 0.09086473286151886
Epoch 22/75, Loss: 0.09086532890796661
Epoch 23/75, Loss: 0.09086686372756958
Epoch 24/75, Loss: 0.09086661040782928
Epoch 25/75, Loss: 0.09086403250694275
Epoch 26/75, Loss: 0.0908640697598457

# now lets predict longer term dependencies

In [12]:
from custom_datasets import FMRIDatasetConcatNoPrepareBatch
import torch
from torch.utils.data import DataLoader

device = 'cuda'

dataset = FMRIDatasetConcatNoPrepareBatch()
trainloader = DataLoader(dataset, batch_size=1000, shuffle=False)

# Load the pre-trained autoencoder
autoencoder_path = 'models/FMRI-data-autoencoder_model.pth'
autoencoder = Autoencoder(input_size=100, feature_size=3, hidden_sizes=[256, 256, 256, 256, 256]).to(device)
autoencoder.load_state_dict(torch.load(autoencoder_path))
autoencoder.eval()


# Extract the encoder from the autoencoder
encoder = autoencoder.encoder

# Freeze the encoder parameters
for param in encoder.parameters():
    param.requires_grad = False

# Define a new decoder for predicting n time steps ahead
n = 20
future_decoder = MLPDecoder(
    input_size=3,
    hidden_sizes=[256, 256, 256, 256, 256],
    output_size=100
).to(device)

# Define the loss function and optimizer for the future decoder
future_criterion = nn.MSELoss()
future_optimizer = torch.optim.Adam(future_decoder.parameters(), lr=1e-4, weight_decay=1e-6)

# Training loop for the future decoder
future_epochs = 30
for epoch in range(future_epochs):
    for i, batch in enumerate(trainloader):
        start = batch[:-n]
        future = batch[n:]

        pairs = torch.stack([start, future], dim=1).float().to(device)

        x0 = pairs[:, 0]
        xN = pairs[:, 1]
        
        # Predict xN from x0 using the future decoder
        encoded_x0 = encoder(x0)
        predicted_xN = future_decoder(encoded_x0)

        # Compute the loss
        loss = future_criterion(predicted_xN, xN)

        # Backpropagation and optimization
        future_optimizer.zero_grad()
        loss.backward()
        future_optimizer.step()

        # Print the loss for every 10th batch
        if i % 10 == 0:
            print(f'Epoch [{epoch+1}/{future_epochs}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}')
        

  autoencoder.load_state_dict(torch.load(autoencoder_path))


Epoch [1/30], Batch [1/29], Loss: 0.0948
Epoch [1/30], Batch [11/29], Loss: 0.0858
Epoch [1/30], Batch [21/29], Loss: 0.0925
Epoch [2/30], Batch [1/29], Loss: 0.0932
Epoch [2/30], Batch [11/29], Loss: 0.0851
Epoch [2/30], Batch [21/29], Loss: 0.0920
Epoch [3/30], Batch [1/29], Loss: 0.0929
Epoch [3/30], Batch [11/29], Loss: 0.0850
Epoch [3/30], Batch [21/29], Loss: 0.0918
Epoch [4/30], Batch [1/29], Loss: 0.0928
Epoch [4/30], Batch [11/29], Loss: 0.0850
Epoch [4/30], Batch [21/29], Loss: 0.0917
Epoch [5/30], Batch [1/29], Loss: 0.0927
Epoch [5/30], Batch [11/29], Loss: 0.0850
Epoch [5/30], Batch [21/29], Loss: 0.0917
Epoch [6/30], Batch [1/29], Loss: 0.0926
Epoch [6/30], Batch [11/29], Loss: 0.0851
Epoch [6/30], Batch [21/29], Loss: 0.0917
Epoch [7/30], Batch [1/29], Loss: 0.0925
Epoch [7/30], Batch [11/29], Loss: 0.0851
Epoch [7/30], Batch [21/29], Loss: 0.0916
Epoch [8/30], Batch [1/29], Loss: 0.0925
Epoch [8/30], Batch [11/29], Loss: 0.0851
Epoch [8/30], Batch [21/29], Loss: 0.0916


In [14]:

# Load the new model
feature_network_path = 'models/NEURIPS-FMRI-model-A-jumping-serenity-12.pth'
feature_network = SkipConnectionSupervenientFeatureNetwork(
    num_atoms=100,
    feature_size=3,
    hidden_sizes=[256, 256, 256, 256, 256],
    include_bias=True
).to(device)
feature_network.load_state_dict(torch.load(feature_network_path))

encoder = feature_network

# Freeze the encoder parameters
for param in encoder.parameters():
    param.requires_grad = False

# Define a new decoder for predicting n time steps ahead
n = 20
future_decoder = MLPDecoder(
    input_size=3,
    hidden_sizes=[256, 256, 256, 256, 256],
    output_size=100
).to(device)

# Define the loss function and optimizer for the future decoder
future_criterion = nn.MSELoss()
future_optimizer = torch.optim.Adam(future_decoder.parameters(), lr=1e-4, weight_decay=1e-6)

# Training loop for the future decoder
future_epochs = 30
for epoch in range(future_epochs):
    for i, batch in enumerate(trainloader):
        start = batch[:-n]
        future = batch[n:]

        pairs = torch.stack([start, future], dim=1).float().to(device)

        x0 = pairs[:, 0]
        xN = pairs[:, 1]
        
        # Predict xN from x0 using the future decoder
        encoded_x0 = encoder(x0)
        predicted_xN = future_decoder(encoded_x0)

        # Compute the loss
        loss = future_criterion(predicted_xN, xN)

        # Backpropagation and optimization
        future_optimizer.zero_grad()
        loss.backward()
        future_optimizer.step()

        # Print the loss for every 10th batch
        if i % 10 == 0:
            print(f'Epoch [{epoch+1}/{future_epochs}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}')
        

  feature_network.load_state_dict(torch.load(feature_network_path))


Epoch [1/30], Batch [1/29], Loss: 0.0948
Epoch [1/30], Batch [11/29], Loss: 0.0859
Epoch [1/30], Batch [21/29], Loss: 0.0926
Epoch [2/30], Batch [1/29], Loss: 0.0933
Epoch [2/30], Batch [11/29], Loss: 0.0850
Epoch [2/30], Batch [21/29], Loss: 0.0922
Epoch [3/30], Batch [1/29], Loss: 0.0930
Epoch [3/30], Batch [11/29], Loss: 0.0850
Epoch [3/30], Batch [21/29], Loss: 0.0922
Epoch [4/30], Batch [1/29], Loss: 0.0930
Epoch [4/30], Batch [11/29], Loss: 0.0850
Epoch [4/30], Batch [21/29], Loss: 0.0922
Epoch [5/30], Batch [1/29], Loss: 0.0930
Epoch [5/30], Batch [11/29], Loss: 0.0850
Epoch [5/30], Batch [21/29], Loss: 0.0922
Epoch [6/30], Batch [1/29], Loss: 0.0930
Epoch [6/30], Batch [11/29], Loss: 0.0850
Epoch [6/30], Batch [21/29], Loss: 0.0922
Epoch [7/30], Batch [1/29], Loss: 0.0930
Epoch [7/30], Batch [11/29], Loss: 0.0850
Epoch [7/30], Batch [21/29], Loss: 0.0923
Epoch [8/30], Batch [1/29], Loss: 0.0930
Epoch [8/30], Batch [11/29], Loss: 0.0850
Epoch [8/30], Batch [21/29], Loss: 0.0923
