In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import numpy as np

from tqdm import tqdm
from torchvision.utils import save_image, make_grid

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:

# # Model Hyperparameters
cuda = True
DEVICE = torch.device("cuda" if cuda else "cpu")
print(DEVICE)
batch_size = 256
img_size = (45, 45) # (width, height)



lr = 2e-4

epochs = 6000

print_step = 800

#Set random seeds
random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)


cuda


In [None]:
import pickle
with open('/content/drive/My Drive/rgb_loader_20_40_60_80_100', 'rb') as handle:
    mlpimages,mlplabels = pickle.load(handle)

mlpimages = mlpimages.view(-1,3,45,45)
print(mlpimages.shape)
print(mlplabels.shape)

torch.Size([52800, 3, 45, 45])
torch.Size([52800, 1])


In [None]:
import torch
import torch.nn as nn

from torchvision import datasets, transforms
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split
import matplotlib.pyplot as plt
import random
import numpy as np

from pylab import rcParams

In [None]:
from torch.utils.data import DataLoader, random_split, TensorDataset

# Assuming mlpimages and mlplabels are already defined and are tensors
dataset = TensorDataset(mlpimages, mlplabels)

# Define the train and validation split ratios
train_ratio = 0.9
val_ratio = 0.1

# Calculate the lengths of the train and validation datasets
train_length = int(len(dataset) * train_ratio)
val_length = len(dataset) - train_length

# Split the dataset into train and validation datasets
train_dataset, val_dataset = random_split(dataset, [train_length, val_length])

# Assuming batch_size is defined
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

# Calculate the percentage of samples
total_samples = len(dataset)
train_samples = len(train_loader.dataset)
val_samples = len(val_loader.dataset)

train_percentage = (train_samples / total_samples) * 100
val_percentage = (val_samples / total_samples) * 100

print(f"Train dataset length: {train_samples}")
print(f"Validation dataset length: {val_samples}")
print(f"Percentage of samples in train_loader: {train_percentage:.2f}%")
print(f"Percentage of samples in val_loader: {val_percentage:.2f}%")


Train dataset length: 47520
Validation dataset length: 5280
Percentage of samples in train_loader: 90.00%
Percentage of samples in val_loader: 10.00%


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Encoder(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, kernel_size=(3, 3, 3, 1), stride=2):
        super(Encoder, self).__init__()

        kernel_1, kernel_2, kernel_3, kernel_4 = kernel_size

        self.strided_conv_1 = nn.Conv2d(input_dim, hidden_dim, kernel_1, stride, padding=1)
        self.strided_conv_2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_2, stride, padding=1)

        self.residual_conv_1 = nn.Conv2d(hidden_dim, hidden_dim, kernel_3, padding=1)
        self.residual_conv_2 = nn.Conv2d(hidden_dim, output_dim, kernel_4, padding=0)

        self.linear_1 = nn.Linear(hidden_dim * 12 * 12, hidden_dim * 12 * 6)
        self.linear_2 = nn.Linear(hidden_dim * 12 * 6, hidden_dim * 12)

    def forward(self, x):

        x = self.strided_conv_1(x)
        x = self.strided_conv_2(x)

        x = F.relu(x)
        y = self.residual_conv_1(x)
        y = y + x

        x = F.relu(y)
        y = self.residual_conv_2(x)
        y = y + x

        # Flatten the tensor
        y1 = y.view(y.size(0), -1)

        # Apply the linear layers with ReLU activation in between
        y1 = F.relu(self.linear_1(y1))
        y1 = self.linear_2(y1)
        y = y1.view(y.size(0), y.size(1), -1)
        # print(y.shape)

        return y


In [None]:
class Receiver_nn(nn.Module):
    def __init__(self,hidden_size):
        super(Receiver_nn, self).__init__()

        self.fc_layers = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.ReLU(inplace=True)
        )
    def forward(self, signal):
        out =  self.fc_layers(signal)
        return out
import torch.nn as nn
from torch.nn.functional import gumbel_softmax
from torch.distributions import RelaxedOneHotCategorical
class Sender(nn.Module):
    def __init__(
        self,
        vocab_size,
        embed_dim,
        hidden_size,
        max_len,
        temperature,
        training,
        straight_through=False,
    ):
        super(Sender, self).__init__()
        self.hidden_size = hidden_size
        assert max_len >= 1, "Cannot have a max_len below 1"
        self.max_len = max_len

        self.hidden_to_output = nn.Linear(hidden_size, vocab_size)
        self.embedding = nn.Linear(vocab_size, embed_dim)
        self.sos_embedding = nn.Parameter(torch.zeros(embed_dim))
        self.embed_dim = embed_dim
        self.vocab_size = vocab_size
        self.training=training

        self.temperature = temperature

        self.cell = nn.LSTMCell(input_size=embed_dim, hidden_size=self.hidden_size)
        # print("LSTM",self.hidden_size)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.normal_(self.sos_embedding, 0.0, 0.01)

    def forward(self, x):
        # print("self hidden",self.hidden_size)
        x= x.view(x.shape[0],-1)
        # print("x is",x.shape)
        prev_hidden = x[:]#UNCOMMENT FOR NEW SENDER
        # print(self.cell)
        # print("Sender_forward_x_shape",prev_hidden.shape)
        prev_c = torch.zeros_like(prev_hidden)  # only for LSTM

        e_t = torch.stack([self.sos_embedding] * prev_hidden.size(0))
        # print("Sender_forward_et_shape",e_t.shape)
        # print("Sender_forward_prevc_shape",prev_c.shape)


        sequence = []

        for step in range(self.max_len):
            h_t, prev_c = self.cell(e_t, (prev_hidden, prev_c))
            logits = self.hidden_to_output(h_t)
            size = logits.size()
            # print("Training is: ",training)
            if not self.training:
                # print("Turned off!")
                indexes = logits.argmax(dim=-1)
                one_hot = torch.zeros_like(logits).view(-1, size[-1])
                one_hot.scatter_(1, indexes.view(-1, 1), 1)
                one_hot = one_hot.view(*size)
                x= one_hot
            # print("h_t",torch.any(torch.isnan(h_t)))
            # print("e_t",e_t)
            # print("prev_c",prev_c)
            # print("logits",torch.isnan(logits))
            else:
                # print("Turned on!")
                x = RelaxedOneHotCategorical(logits=logits, temperature=self.temperature).rsample()


            prev_hidden = h_t
            e_t = self.embedding(x)
            sequence.append(x)

        sequence = torch.stack(sequence).permute(1, 0, 2)

        eos = torch.zeros_like(sequence[:, 0, :]).unsqueeze(1)
        eos[:, 0, 0] = 1
        sequence = torch.cat([sequence, eos], dim=1)

        return sequence
class Receiver(nn.Module):

    def __init__(self, vocab_size, embed_dim, hidden_size,agent):
        super(Receiver, self).__init__()

        self.cell = nn.LSTMCell(input_size=embed_dim, hidden_size=hidden_size)

        self.embedding = nn.Linear(vocab_size, embed_dim)
        self.agent = agent

    def forward(self, message=None,input=None, aux_input=None):
        outputs = []
        # print("received_message_shape",message.shape)
        emb = self.embedding(message)

        prev_hidden = None
        prev_c = None

        # to get an access to the hidden states, we have to unroll the cell ourselves
        for step in range(message.size(1)):
            e_t = emb[:, step, ...]
            h_t, prev_c = (
                self.cell(e_t, (prev_hidden, prev_c))
                if prev_hidden is not None
                else self.cell(e_t)
            )

            outputs.append(self.agent(h_t))
            prev_hidden = h_t

        outputs = torch.stack(outputs).permute(1, 0, 2)

        # print(outputs[:, -1, :].view(-1,outputs.shape[2]).shape,"lplpl")
        # print("outputs shape",outputs.shape)
        # return outputs[:, -1, :].view(-1,outputs.shape[2]) #Original line for only sendinbg last image
        return outputs #New line to send all images


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Decoder(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, kernel_sizes=(1, 3, 3, 3), stride=2, ):
        #Add new parameter first k images are considered
        super(Decoder, self).__init__()

        kernel_1, kernel_2, kernel_3, kernel_4 = kernel_sizes

        self.linear_1 = nn.Linear(hidden_dim * 12, hidden_dim * 12 * 6)
        self.linear_2 = nn.Linear(hidden_dim * 12 * 6, hidden_dim * 12 * 12)

        self.residual_conv_1 = nn.Conv2d(input_dim, hidden_dim, kernel_1, padding=0)
        self.residual_conv_2 = nn.Conv2d(hidden_dim, hidden_dim, kernel_2, padding=1)

        self.strided_t_conv_1 = nn.ConvTranspose2d(hidden_dim, hidden_dim, kernel_3, stride, padding=1, output_padding=0)
        self.strided_t_conv_2 = nn.ConvTranspose2d(hidden_dim, output_dim, kernel_4, stride, padding=1)

    def forward(self, x):
        batch_size, seq_len, feature_size = x.shape[0], x.shape[1], x.shape[2]
        outputs = []
        # Process each part in the second dimension independently
        for i in range(seq_len): #Change seq_len to ica_order
            x_i = x[:, i, :]
            x_i = x_i.view(batch_size, -1)
            x_i = F.relu(self.linear_1(x_i))
            x_i = self.linear_2(x_i)
            x_i = x_i.view(x_i.size(0), x_i.size(1) // 144, 12, 12)

            y_i = self.residual_conv_1(x_i)
            y_i = y_i + x_i
            x_i = F.relu(y_i)

            y_i = self.residual_conv_2(x_i)
            y_i = y_i + x_i
            y_i = F.relu(y_i)

            y_i = self.strided_t_conv_1(y_i)
            y_i = self.strided_t_conv_2(y_i)

            outputs.append(y_i.unsqueeze(1))

        # Stack the processed parts in the second dimension
        output = torch.cat(outputs, dim=1)

        return output


In [None]:
class Model(nn.Module):
    def __init__(self, Encoder, Decoder, Sender, Receiver):
        super(Model, self).__init__()
        self.encoder = Encoder
        self.decoder = Decoder
        self.Sender = Sender
        self.Receiver = Receiver


    def forward(self, x):
        z = self.encoder(x)

        message = None
        message = self.Sender(z) #z is hidden state for sender lstm

        receiveroutput = self.Receiver(message=message)
        x_hat            = self.decoder(receiveroutput)
        return x_hat, message, z,receiveroutput



In [None]:
#Define Hyperparameters
input_dim = 3 #Encoder
hidden_dim = 50
output_dim = 3
sentence_length=10
vocab_size = 100
embed_dim =50


encoder = Encoder(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=hidden_dim)
decoder = Decoder(input_dim=hidden_dim, hidden_dim=hidden_dim, output_dim=output_dim)
sender = Sender(vocab_size, embed_dim, hidden_dim *12 , sentence_length, 1.0, True)
receiver_nn = Receiver_nn(hidden_dim*12)
receiver = Receiver(vocab_size,embed_dim,hidden_dim*12,receiver_nn)


model = Model(Encoder=encoder, Decoder=decoder, Sender = sender, Receiver = receiver).to(DEVICE)

In [None]:
def asymmetric_mse_loss(output, target, smaller_weight=1.5, larger_weight=0.5):
    """
    Asymmetric MSE loss which penalizes more heavily when the output pixel value
    is smaller than the target pixel value.
    """
    # Find where output is less than target
    smaller = (output < target).float()
    # Find where output is greater than or equal to target
    larger_or_equal = (output >= target).float()

    # Calculate the asymmetric weighted loss
    loss = (smaller_weight * smaller * (target - output) ** 2 +
            larger_weight * larger_or_equal * (output - target) ** 2)
    return loss.mean()


In [None]:
from torch.optim import Adam

# # Initialize the optimizer
mse_loss = nn.MSELoss()
# optimizer = Adam(params_to_optimize, lr=lr)
optimizer = Adam(list(model.parameters()), lr=lr)

In [None]:
model = model.to(DEVICE)

In [None]:
#Run this for progressive
from PIL import Image
print("Start training VQ-VAE...")
model.train()

# Lists to keep track of validation and training losses
losses = []
val_losses = []

# weights = [0.0001, 0.00002, 0.000004, 0.0000008, 0.00000016]

# Your main training loop:
for epoch in range(epochs):
    overall_loss = 0
    for batch_idx, (x, _) in enumerate(train_loader):
        ica_orders = torch.div(_, 100, rounding_mode='trunc')
        ica_orders-=1

        x = x.to(DEVICE)
        optimizer.zero_grad()
        x_hat1, message, z, receiver_op = model(x)
        # Expand dimensions for advanced indexing
        ica_orders_expanded = ica_orders.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
        # Gather along the second dimension using ica_orders
        x_hat = torch.gather(x_hat1, 1, ica_orders_expanded.expand(-1, -1, 3, 45, 45).to(DEVICE)).squeeze(1)

        recon_loss = mse_loss(x_hat, x)

        loss =  recon_loss

        loss.backward()

        optimizer.step()

        if batch_idx % print_step == 0:
            losses.append(recon_loss.item())
            print("Training epoch:", epoch + 1)
            print("epoch:", epoch + 1, "  step:", batch_idx + 1, " recon_loss:", recon_loss.item())
            # print("step: {}  recon_loss: {:.8f}  diversity penalty: {}  total diversity: {:.8f}  total loss: {:.8f}".format(
            #     batch_idx + 1,
            #     recon_loss.item(),
            #     [round(val, 8) for val in diversity_penalty],  # This will round each value in the list to 8 decimal places
            #     sum(diversity_penalty),
            #     loss.item()
            # ))

    # Validation part
    model.eval()  # switch model to the evaluation mode
    with torch.no_grad():  # deactivate autograd engine to reduce memory usage and speed up computations
        for batch_idx, (x, _) in enumerate(val_loader):
            ica_orders = torch.div(_, 100, rounding_mode='trunc')
            ica_orders-=1
            x = x.to(DEVICE)
            x_hat1, message, z, receiver_op = model(x)
            # Expand dimensions for advanced indexing
            ica_orders_expanded = ica_orders.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
            # Gather along the second dimension using ica_orders
            x_hat = torch.gather(x_hat1, 1, ica_orders_expanded.expand(-1, -1, 3, 45, 45).to(DEVICE)).squeeze(1)

            recon_loss = mse_loss(x_hat, x)



            if batch_idx % print_step == 0:
                val_losses.append(recon_loss.item())
                print("epoch:", epoch + 1, "  step:", batch_idx + 1, " recon_loss:", recon_loss.item())
                # print("step: {}  recon_loss: {:.8f}  diversity penalty: {}  total diversity: {:.8f}  total loss: {:.8f}".format(
                #     batch_idx + 1,
                #     recon_loss.item(),
                #     [round(val, 8) for val in diversity_penalty],  # This will round each value in the list to 8 decimal places
                #     sum(diversity_penalty),
                #     loss.item()
                # ))



    model.train()  # switch model back to the train mode

print("Finish!!")


Start training VQ-VAE...
Training epoch: 1
epoch: 1   step: 1  recon_loss: 0.2070959508419037
epoch: 1   step: 1  recon_loss: 0.023292308673262596
Training epoch: 2
epoch: 2   step: 1  recon_loss: 0.00762425409629941


KeyboardInterrupt: 

In [None]:
#Run this for strict progressive (not vectorized) with diversity penalty
from PIL import Image
print("Start training VQ-VAE...")
model.train()

# Lists to keep track of validation and training losses
losses = []
val_losses = []

# Your main training loop:
for epoch in range(epochs):
    overall_loss = 0
    for batch_idx, (x, _) in enumerate(train_loader):
        ica_orders = torch.div(_, 200, rounding_mode='trunc')
        x = x.to(DEVICE)
        optimizer.zero_grad()
        x_hat, message, z, receiver_op = model(x)


        losses_batch = []
        for i in range(x.size(0)):
            # Expand x[i] according to ica_orders[i]
            expanded_x = x[i].unsqueeze(0).expand(ica_orders[i].item(), -1, -1, -1)
            # Truncate x_hat[i] according to ica_orders[i]
            truncated_x_hat = x_hat[i, :ica_orders[i].item()]
            # Compute the loss and store
            individual_loss = mse_loss(truncated_x_hat, expanded_x)
            losses_batch.append(individual_loss)

        # Average out the losses
        recon_loss = torch.stack(losses_batch).mean()
        loss = recon_loss


        loss.backward()
        optimizer.step()

        if batch_idx % print_step == 0:
            losses.append(recon_loss.item())
            print("Training epoch:", epoch + 1)
            print("epoch:", epoch + 1, "  step:", batch_idx + 1, " recon_loss:", recon_loss.item())
            # print("step: {}  recon_loss: {:.8f}  diversity penalty: {}  total diversity: {:.8f}  total loss: {:.8f}".format(
            #     batch_idx + 1,
            #     recon_loss.item(),
            #     [round(val, 8) for val in diversity_penalty],  # This will round each value in the list to 8 decimal places
            #     sum(diversity_penalty),
            #     loss.item()
            # ))

    # Validation part
    model.eval()  # switch model to the evaluation mode
    with torch.no_grad():  # deactivate autograd engine to reduce memory usage and speed up computations
        val_loss = 0
        for batch_idx, (x, _) in enumerate(val_loader):
            ica_orders = torch.div(_, 200, rounding_mode='trunc')
            x = x.to(DEVICE)
            optimizer.zero_grad()
            x_hat, message, z, receiver_op = model(x)

            # diversity_penalty = compute_diversity_penalty( torch.argmax(message, dim=2), weights)

            losses_batch = []

            for i in range(x.size(0)):
                # Expand x[i] according to ica_orders[i]
                expanded_x = x[i].unsqueeze(0).expand(ica_orders[i].item(), -1, -1, -1)

                # Truncate x_hat[i] according to ica_orders[i]
                truncated_x_hat = x_hat[i, :ica_orders[i].item()]

                # Compute the loss and store
                individual_loss = mse_loss(truncated_x_hat, expanded_x)
                losses_batch.append(individual_loss)

            # Average out the losses
            recon_loss = torch.stack(losses_batch).mean()

            # Add diversity penalty to the loss
            # loss = recon_loss + sum(diversity_penalty)
            loss = recon_loss



            if batch_idx % print_step == 0:
                val_losses.append(recon_loss.item())
                print("epoch:", epoch + 1, "  step:", batch_idx + 1, " recon_loss:", recon_loss.item())
                # print("step: {}  recon_loss: {:.8f}  diversity penalty: {}  total diversity: {:.8f}  total loss: {:.8f}".format(
                #     batch_idx + 1,
                #     recon_loss.item(),
                #     [round(val, 8) for val in diversity_penalty],  # This will round each value in the list to 8 decimal places
                #     sum(diversity_penalty),
                #     loss.item()
                # ))



    model.train()  # switch model back to the train mode

print("Finish!!")


Start training VQ-VAE...
z shape torch.Size([256, 50, 12])


RuntimeError: ignored

In [None]:
model.load_state_dict(torch.load('/content/drive/My Drive/emergentallVAE_epochs_728_all_ICA_sent5_progressive_strict.pth'))


<All keys matched successfully>

In [None]:
#Better way to save and load:
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': epoch,
    'losses': losses,
    'validation_losses':val_losses
}, '/content/drive/My Drive/checkpoints_epochs_353_ICA_all_sent5_progressive.pth')

In [None]:
print("Running inference on entire dataset...")
model.train()
# losses = []
labels = []
messages = []
embeddings = []
model.Sender.training = False
for epoch in range(epochs,epochs+1):
    overall_loss = 0
    for batch_idx, (x, _) in enumerate(loader):
        x = x.to(DEVICE)
        x_hat, message,z,embedding = model(x)
        x = x.unsqueeze(1)
        x = x.expand(-1, sentence_length, -1, -1, -1)
        labels.extend(_)
        # print(_)
        messages.extend(message)
        # embeddings.extend(embedding[:,-1,:])

model.Sender.training = True
print("Finish!!")

Start training VQ-VAE...
Finish!!


In [None]:
import pandas as pd, numpy as np
lookup_100 = set()
lookup_100_new = set()
lookup_80 = set()
lookup_60 = set()
lookup_40 = set()
lookup_20 = set()
# label_df_100_recovery = pd.read_csv("/content/ClusterNames.csv")
label_df_100_recovery_new = pd.read_csv("/content/drive/My Drive/ClusterNames_100.csv")
label_df_80_recovery = pd.read_csv("/content/drive/My Drive/ClusterNames_80.csv")
label_df_60_recovery = pd.read_csv("/content/drive/My Drive/ClusterNames_60.csv")
label_df_40_recovery = pd.read_csv("/content/drive/My Drive/ClusterNames_40.csv")
label_df_20_recovery = pd.read_csv("/content/drive/My Drive/ClusterNames_20.csv")
# label_names_100 = list(label_df_100_recovery['Cluster Name 100'])
label_names_100_new = list(label_df_100_recovery_new['Cluster Name 100'])
label_names_80 = list(label_df_80_recovery['Cluster Name 100'])
label_names_60 = list(label_df_60_recovery['Cluster Name 100'])
label_names_40 = list(label_df_40_recovery['Cluster Name 100'])
label_names_20 = list(label_df_20_recovery['Cluster Name 100'])
# label_names_unique_100 = np.array([x + "_ICA100" for x in label_names_100 if x not in lookup_100 and lookup_100.add(x) is None])
label_names_unique_100_new = np.array([x + "_ICA100" for x in label_names_100_new if x not in lookup_100_new and lookup_100_new.add(x) is None])
label_names_unique_80 = np.array([x + "_ICA80" for x in label_names_80 if x not in lookup_80 and lookup_80.add(x) is None])
label_names_unique_60 = np.array([x + "_ICA60" for x in label_names_60 if x not in lookup_60 and lookup_60.add(x) is None])
label_names_unique_40 = np.array([x + "_ICA40" for x in label_names_40 if x not in lookup_40 and lookup_40.add(x) is None])
label_names_unique_20 = np.array([x + "_ICA20" for x in label_names_20 if x not in lookup_20 and lookup_20.add(x) is None])
# Put them all in one list
label_names_unique_all = [label_names_unique_20, label_names_unique_40, label_names_unique_60, label_names_unique_80, label_names_unique_100_new]

In [None]:
def generate_messages(messages,labels):
    # Assuming you have defined messages, label_names_unique, and labels arrays
    output_csv = []
    model.Sender.training = False
    import csv
    for z in range(len(labels)):
        message = np.argmax(messages[z].cpu().detach().numpy(),axis=1).astype(int)
        # if labels[z]%200 <=57:
        test_label = label_names_unique_all[(labels[z]//200) - 1][labels[z]%200]
        # elif labels[z]%200 == 58:
        #   test_label = "CEREBELLAR-LATERAL-LEFT-SUPERIOR_ICA100"
        # elif labels[z]%200 == 59:
        #   test_label = "CEREBELLAR-LATERAL-RIGHT-SUPERIOR_ICA100"



        result = np.concatenate((message[:len(message)-1].reshape(1, -1), np.array([test_label]).reshape(1, -1)), axis=1)

        # Convert the numpy array to a list of integers and append to output_csv
        output_csv.append(result.tolist()[0])

    # Write the mixed data types to the CSV file
    with open("corrected_vocab100_sent_5_epoch_1373_2040100_ICA_progressive_strict.csv", "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerows(output_csv)

    model.Sender.training = True
    return output_csv


In [None]:
def generate_messages(messages,labels):
    # Assuming you have defined messages, label_names_unique, and labels arrays
    output_csv = []
    model.Sender.training = False
    import csv
    for z in range(len(labels)):
        message = np.argmax(messages[z].cpu().detach().numpy(),axis=1).astype(int)
        test_label = label_names_unique_all[(labels[z]//200) - 1][labels[z]%200]

        result = np.concatenate((message[:len(message)-1].reshape(1, -1), np.array([test_label]).reshape(1, -1)), axis=1)

        # Convert the numpy array to a list of integers and append to output_csv
        output_csv.append(result.tolist()[0])

    # Write the mixed data types to the CSV file
    with open("corrected_table_sent_5_epoch_197_261_all_ICA_progressive_strict_assymmetry.csv", "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerows(output_csv)

    model.Sender.training = True
    return output_csv


In [None]:
output = generate_messages(messages,labels)

In [None]:
print(len(label_names_unique_80))

51


In [None]:
newlabels = {
    "DMN": 0, "ATTENTION": 1, "MOTOR": 2, "VISUAL": 3, "EXECUTIVE": 4,
    "SENSORY": 5, "SALIENCE": 6, "AUDITORY": 7, "COGNITIVE": 8, "BASALGANGLIA": 9,
    "LANG": 10, "CEREBELLAR": 11, "HYPOTHALAMUS": 12, "THALAMUS": 13, "OTHERS": 14
}
newnumbers = {value: key for key, value in newlabels.items()}

In [None]:
def custom_distance_network(u, v):
    for i in range(len(u)):
        if u[i] != v[i]:
            return sentence_length - i
    return 0
def generate_messages_network(messages,labels):
    # Assuming you have defined messages, label_names_unique, and labels arrays
    output_csv = []
    group_csv = []

    model.Sender.training = False
    message = []
    n = 2000
    maxdist = 3
    import csv
    # labelsused = [0 for i in range(58)]
    for z in range(len(labels)):
        group = 8
        for i in newlabels:
            if label_names_unique_all[(labels[z]//200) - 1][labels[z]%200][:len(i)] == i and newlabels[i]<=7:
                group = newlabels[i]
        message.append(np.argmax(messages[z],axis=1).astype(int))
        result = np.concatenate((np.array(label_names_unique_all[(labels[z]//200) - 1][labels[z]%200]+str(z)).reshape(1,-1), np.array(group).reshape(1,-1)), axis=1)
        if z < n:
            group_csv.append(result.tolist()[0])

        # test_label = label_names_unique[labels[z]]

        # result = np.concatenate((message[:len(message)-1].reshape(1, -1), np.array([test_label]).reshape(1, -1)), axis=1)

        # # Convert the numpy array to a list of integers and append to output_csv
        # output_csv.append(result.tolist()[0])
    message= np.array(message)

    counts = [0 for i in range(maxdist+1)]
    for i in range(n):
        for j in range(n):
            dist = custom_distance_network(message[i],message[j])
            if dist <=maxdist:
                # if dist == 0 or dist ==1 or dist ==2:
                #     print(dist)
                #     print(message[i])
                #     print(message[j])

                counts[dist] += 1
                test_labeli = label_names_unique_all[(labels[i]//200) - 1][labels[i]%200] + str(i)
                test_labelj = label_names_unique_all[(labels[j]//200) - 1][labels[j]%200] + str(j)
                # print("test_label",test_label.shape)
                # print("message",message[i][:len(message[i])-1].reshape(1, -1).shape)
                result = np.concatenate((np.array([test_labeli,test_labelj]).reshape(1,-1),np.array([dist]).reshape(1,-1)), axis=1)
                output_csv.append(result.tolist()[0])

    print(counts)
    # Write the mixed data types to the CSV file
    with open(str(n)+"maxdist"+ str(maxdist)+ "distances_sent_5_epoch_728_iterative_all.csv", "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerows(output_csv)
    with open(str(n)+"maxdist"+ str(maxdist)+"groups1_sent_5_epoch_728_iterative_all.csv", "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerows(group_csv)

    model.Sender.training = True
    # return output_csv


In [None]:
messages1 = torch.stack(messages)[:,:,:]
generate_messages_network(messages1.cpu().detach().numpy(),labels)

[2110, 280, 1030, 5604]


In [None]:
import pandas as pd, numpy as np
lookup_100 = set()
lookup_80 = set()
lookup_60 = set()
lookup_40 = set()
lookup_20 = set()
label_df_100_recovery = pd.read_csv("/home/azureuser/cloudfiles/code/Users/apallik1/Inference Codes and Required Docs/ClusterNames/ClusterNames_100.csv")
label_df_80_recovery = pd.read_csv("/home/azureuser/cloudfiles/code/Users/apallik1/Inference Codes and Required Docs/ClusterNames/ClusterNames_80.csv")
label_df_60_recovery = pd.read_csv("/home/azureuser/cloudfiles/code/Users/apallik1/Inference Codes and Required Docs/ClusterNames/ClusterNames_60.csv")
label_df_40_recovery = pd.read_csv("/home/azureuser/cloudfiles/code/Users/apallik1/Inference Codes and Required Docs/ClusterNames/ClusterNames_40.csv")
label_df_20_recovery = pd.read_csv("/home/azureuser/cloudfiles/code/Users/apallik1/Inference Codes and Required Docs/ClusterNames/ClusterNames_20.csv")
label_names_100 = list(label_df_100_recovery['Cluster Name 100'])
label_names_80 = list(label_df_80_recovery['Cluster Name 100'])
label_names_60 = list(label_df_60_recovery['Cluster Name 100'])
label_names_40 = list(label_df_40_recovery['Cluster Name 100'])
label_names_20 = list(label_df_20_recovery['Cluster Name 100'])
label_names_unique_100 = np.array([x + "_ICA100" for x in label_names_100 if x not in lookup_100 and lookup_100.add(x) is None])
label_names_unique_80 = np.array([x + "_ICA80" for x in label_names_80 if x not in lookup_80 and lookup_80.add(x) is None])
label_names_unique_60 = np.array([x + "_ICA60" for x in label_names_60 if x not in lookup_60 and lookup_60.add(x) is None])
label_names_unique_40 = np.array([x + "_ICA40" for x in label_names_40 if x not in lookup_40 and lookup_40.add(x) is None])
label_names_unique_20 = np.array([x + "_ICA20" for x in label_names_20 if x not in lookup_20 and lookup_20.add(x) is None])
# Put them all in one list
label_names_unique_all = [label_names_unique_20, label_names_unique_40, label_names_unique_60, label_names_unique_80, label_names_unique_100]

FileNotFoundError: [Errno 2] No such file or directory: '/home/azureuser/cloudfiles/code/Users/apallik1/Inference Codes and Required Docs/ClusterNames/ClusterNames_100.csv'

In [None]:
# messages = torch.stack(messages)[:,:,:]
output = generate_messages(messages,labels)

In [None]:
!pip freeze > requirements.txt