In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
'''
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''        
        

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

"\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n    for filename in filenames:\n        print(os.path.join(dirname, filename))\n"

In [7]:
############################## Using CLIP model to bring EEG embeddings and Text Embeddings Closer #####################################

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm  # For progress bars

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load and normalize embeddings
eeg_embeddings = np.load('/kaggle/input/embeddings/eeg_embeddings.npy')
text_embeddings = np.load('/kaggle/input/embeddings/generated_captions_embeddings.npy')

# Normalize embeddings
eeg_embeddings = torch.tensor(eeg_embeddings / np.linalg.norm(eeg_embeddings, axis=1, keepdims=True), dtype=torch.float32)
text_embeddings = torch.tensor(text_embeddings / np.linalg.norm(text_embeddings, axis=1, keepdims=True), dtype=torch.float32)

# Transfer embeddings to GPU
eeg_embeddings = eeg_embeddings.to(device)
text_embeddings = text_embeddings.to(device)

# Create dataset and dataloader
dataset = TensorDataset(eeg_embeddings, text_embeddings)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Define CLIP-like model
class CLIPModel(nn.Module):
    def __init__(self, eeg_dim, text_dim, projection_dim):
        super(CLIPModel, self).__init__()
        self.eeg_encoder = nn.Linear(eeg_dim, projection_dim)
        self.text_encoder = nn.Linear(text_dim, projection_dim)

    def forward(self, eeg, text):
        eeg_proj = self.eeg_encoder(eeg)
        text_proj = self.text_encoder(text)
        return eeg_proj, text_proj

# Loss function: Contrastive loss
def contrastive_loss(eeg_proj, text_proj, temperature=0.07):
    logits = torch.matmul(eeg_proj, text_proj.T) / temperature
    labels = torch.arange(logits.size(0)).to(logits.device)
    loss_eeg = nn.CrossEntropyLoss()(logits, labels)
    loss_text = nn.CrossEntropyLoss()(logits.T, labels)
    return (loss_eeg + loss_text) / 2

# Model, optimizer, and training setup
eeg_dim = eeg_embeddings.size(1)
text_dim = text_embeddings.size(1)
projection_dim = 512
model = CLIPModel(eeg_dim, text_dim, projection_dim).to(device)  # Transfer model to GPU
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop with progress monitoring
epochs = 20
model.train()
for epoch in range(epochs):
    epoch_loss = 0
    with tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}") as progress_bar:
        for eeg_batch, text_batch in progress_bar:
            optimizer.zero_grad()
            eeg_proj, text_proj = model(eeg_batch, text_batch)
            loss = contrastive_loss(eeg_proj, text_proj)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            progress_bar.set_postfix({"Batch Loss": loss.item()})
    print(f"Epoch {epoch+1}/{epochs}, Average Loss: {epoch_loss / len(dataloader):.4f}")

# Save modified embeddings
model.eval()
modified_eeg_embeddings = []
modified_text_embeddings = []
with torch.no_grad():
    with tqdm(dataloader, desc="Generating Modified Embeddings") as progress_bar:
        for eeg_batch, text_batch in progress_bar:
            eeg_proj, text_proj = model(eeg_batch, text_batch)
            modified_eeg_embeddings.append(eeg_proj.cpu())
            modified_text_embeddings.append(text_proj.cpu())

# Combine and save embeddings
modified_eeg_embeddings = torch.cat(modified_eeg_embeddings, dim=0).numpy()
modified_text_embeddings = torch.cat(modified_text_embeddings, dim=0).numpy()

np.save('modified_eeg_embeddings.npy', modified_eeg_embeddings)
np.save('modified_text_embeddings.npy', modified_text_embeddings)

print("Modified embeddings saved!")

####################################################################################################################################

Using device: cuda


Epoch 1/20: 100%|██████████| 259/259 [00:00<00:00, 300.35it/s, Batch Loss=3.6] 


Epoch 1/20, Average Loss: 4.4892


Epoch 2/20: 100%|██████████| 259/259 [00:00<00:00, 312.80it/s, Batch Loss=3.66]


Epoch 2/20, Average Loss: 4.2536


Epoch 3/20: 100%|██████████| 259/259 [00:00<00:00, 315.09it/s, Batch Loss=3.62]


Epoch 3/20, Average Loss: 4.1603


Epoch 4/20: 100%|██████████| 259/259 [00:00<00:00, 307.51it/s, Batch Loss=3.35]


Epoch 4/20, Average Loss: 4.0978


Epoch 5/20: 100%|██████████| 259/259 [00:00<00:00, 305.01it/s, Batch Loss=3.36]


Epoch 5/20, Average Loss: 4.0522


Epoch 6/20: 100%|██████████| 259/259 [00:00<00:00, 305.72it/s, Batch Loss=3.21]


Epoch 6/20, Average Loss: 4.0230


Epoch 7/20: 100%|██████████| 259/259 [00:00<00:00, 311.21it/s, Batch Loss=3.17]


Epoch 7/20, Average Loss: 3.9993


Epoch 8/20: 100%|██████████| 259/259 [00:00<00:00, 309.02it/s, Batch Loss=2.98]


Epoch 8/20, Average Loss: 3.9850


Epoch 9/20: 100%|██████████| 259/259 [00:00<00:00, 296.47it/s, Batch Loss=3.22]


Epoch 9/20, Average Loss: 3.9703


Epoch 10/20: 100%|██████████| 259/259 [00:00<00:00, 271.23it/s, Batch Loss=3.22]


Epoch 10/20, Average Loss: 3.9622


Epoch 11/20: 100%|██████████| 259/259 [00:00<00:00, 283.52it/s, Batch Loss=3.08]


Epoch 11/20, Average Loss: 3.9536


Epoch 12/20: 100%|██████████| 259/259 [00:00<00:00, 306.61it/s, Batch Loss=3.13]


Epoch 12/20, Average Loss: 3.9487


Epoch 13/20: 100%|██████████| 259/259 [00:00<00:00, 306.76it/s, Batch Loss=3.06]


Epoch 13/20, Average Loss: 3.9452


Epoch 14/20: 100%|██████████| 259/259 [00:00<00:00, 301.44it/s, Batch Loss=3.04]


Epoch 14/20, Average Loss: 3.9390


Epoch 15/20: 100%|██████████| 259/259 [00:00<00:00, 296.73it/s, Batch Loss=3.14]


Epoch 15/20, Average Loss: 3.9376


Epoch 16/20: 100%|██████████| 259/259 [00:00<00:00, 300.42it/s, Batch Loss=3.11]


Epoch 16/20, Average Loss: 3.9343


Epoch 17/20: 100%|██████████| 259/259 [00:00<00:00, 301.69it/s, Batch Loss=3.04]


Epoch 17/20, Average Loss: 3.9288


Epoch 18/20: 100%|██████████| 259/259 [00:00<00:00, 300.97it/s, Batch Loss=3.45]


Epoch 18/20, Average Loss: 3.9286


Epoch 19/20: 100%|██████████| 259/259 [00:00<00:00, 303.57it/s, Batch Loss=3.3] 


Epoch 19/20, Average Loss: 3.9273


Epoch 20/20: 100%|██████████| 259/259 [00:00<00:00, 306.36it/s, Batch Loss=3.26]


Epoch 20/20, Average Loss: 3.9195


Generating Modified Embeddings: 100%|██████████| 259/259 [00:00<00:00, 1153.00it/s]


Modified embeddings saved!


In [8]:
############################## Using CLIP model to bring EEG embeddings and Text Embeddings Closer( for vae only )#####################################

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm  # For progress bars

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load and normalize embeddings
eeg_embeddings = np.load('/kaggle/input/embeddings/eeg_embeddings.npy')
text_embeddings = np.load('/kaggle/input/embeddings/generated_captions_vae_embeddings.npy')

# Normalize embeddings
eeg_embeddings = torch.tensor(eeg_embeddings / np.linalg.norm(eeg_embeddings, axis=1, keepdims=True), dtype=torch.float32)
text_embeddings = torch.tensor(text_embeddings / np.linalg.norm(text_embeddings, axis=1, keepdims=True), dtype=torch.float32)

# Transfer embeddings to GPU
eeg_embeddings = eeg_embeddings.to(device)
text_embeddings = text_embeddings.to(device)

# Create dataset and dataloader
dataset = TensorDataset(eeg_embeddings, text_embeddings)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Define CLIP-like model
class CLIPModel(nn.Module):
    def __init__(self, eeg_dim, text_dim, projection_dim):
        super(CLIPModel, self).__init__()
        self.eeg_encoder = nn.Linear(eeg_dim, projection_dim)
        self.text_encoder = nn.Linear(text_dim, projection_dim)

    def forward(self, eeg, text):
        eeg_proj = self.eeg_encoder(eeg)
        text_proj = self.text_encoder(text)
        return eeg_proj, text_proj

# Loss function: Contrastive loss
def contrastive_loss(eeg_proj, text_proj, temperature=0.07):
    logits = torch.matmul(eeg_proj, text_proj.T) / temperature
    labels = torch.arange(logits.size(0)).to(logits.device)
    loss_eeg = nn.CrossEntropyLoss()(logits, labels)
    loss_text = nn.CrossEntropyLoss()(logits.T, labels)
    return (loss_eeg + loss_text) / 2

# Model, optimizer, and training setup
eeg_dim = eeg_embeddings.size(1)
text_dim = text_embeddings.size(1)
projection_dim = 512
model = CLIPModel(eeg_dim, text_dim, projection_dim).to(device)  # Transfer model to GPU
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop with progress monitoring
epochs = 20
model.train()
for epoch in range(epochs):
    epoch_loss = 0
    with tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}") as progress_bar:
        for eeg_batch, text_batch in progress_bar:
            optimizer.zero_grad()
            eeg_proj, text_proj = model(eeg_batch, text_batch)
            loss = contrastive_loss(eeg_proj, text_proj)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            progress_bar.set_postfix({"Batch Loss": loss.item()})
    print(f"Epoch {epoch+1}/{epochs}, Average Loss: {epoch_loss / len(dataloader):.4f}")

# Save modified embeddings
model.eval()
modified_eeg_embeddings = []
modified_text_embeddings = []
with torch.no_grad():
    with tqdm(dataloader, desc="Generating Modified Embeddings") as progress_bar:
        for eeg_batch, text_batch in progress_bar:
            eeg_proj, text_proj = model(eeg_batch, text_batch)
            modified_eeg_embeddings.append(eeg_proj.cpu())
            modified_text_embeddings.append(text_proj.cpu())

# Combine and save embeddings
modified_eeg_embeddings = torch.cat(modified_eeg_embeddings, dim=0).numpy()
modified_text_embeddings = torch.cat(modified_text_embeddings, dim=0).numpy()

np.save('modified_eeg_vae_embeddings.npy', modified_eeg_embeddings)
np.save('modified_text_vae_embeddings.npy', modified_text_embeddings)

print("Modified embeddings saved!")

####################################################################################################################################

Using device: cuda


Epoch 1/20: 100%|██████████| 259/259 [00:00<00:00, 323.14it/s, Batch Loss=3.33]


Epoch 1/20, Average Loss: 4.2304


Epoch 2/20: 100%|██████████| 259/259 [00:00<00:00, 324.94it/s, Batch Loss=3.33]


Epoch 2/20, Average Loss: 4.1557


Epoch 3/20: 100%|██████████| 259/259 [00:00<00:00, 312.44it/s, Batch Loss=3.33]


Epoch 3/20, Average Loss: 4.1557


Epoch 4/20: 100%|██████████| 259/259 [00:00<00:00, 262.43it/s, Batch Loss=3.33]


Epoch 4/20, Average Loss: 4.1557


Epoch 5/20: 100%|██████████| 259/259 [00:00<00:00, 292.56it/s, Batch Loss=3.33]


Epoch 5/20, Average Loss: 4.2273


Epoch 6/20: 100%|██████████| 259/259 [00:00<00:00, 315.60it/s, Batch Loss=3.33]


Epoch 6/20, Average Loss: 4.1557


Epoch 7/20: 100%|██████████| 259/259 [00:00<00:00, 319.07it/s, Batch Loss=3.33]


Epoch 7/20, Average Loss: 4.1557


Epoch 8/20: 100%|██████████| 259/259 [00:00<00:00, 325.41it/s, Batch Loss=3.33]


Epoch 8/20, Average Loss: 4.1557


Epoch 9/20: 100%|██████████| 259/259 [00:00<00:00, 326.95it/s, Batch Loss=3.33]


Epoch 9/20, Average Loss: 4.1557


Epoch 10/20: 100%|██████████| 259/259 [00:00<00:00, 320.95it/s, Batch Loss=3.33]


Epoch 10/20, Average Loss: 4.1557


Epoch 11/20: 100%|██████████| 259/259 [00:00<00:00, 317.98it/s, Batch Loss=3.33]


Epoch 11/20, Average Loss: 4.1557


Epoch 12/20: 100%|██████████| 259/259 [00:00<00:00, 297.99it/s, Batch Loss=3.33]


Epoch 12/20, Average Loss: 4.1557


Epoch 13/20: 100%|██████████| 259/259 [00:00<00:00, 325.65it/s, Batch Loss=3.34]


Epoch 13/20, Average Loss: 4.1811


Epoch 14/20: 100%|██████████| 259/259 [00:00<00:00, 319.62it/s, Batch Loss=3.33]


Epoch 14/20, Average Loss: 4.1572


Epoch 15/20: 100%|██████████| 259/259 [00:00<00:00, 324.67it/s, Batch Loss=3.33]


Epoch 15/20, Average Loss: 4.1557


Epoch 16/20: 100%|██████████| 259/259 [00:00<00:00, 308.22it/s, Batch Loss=3.33]


Epoch 16/20, Average Loss: 4.1557


Epoch 17/20: 100%|██████████| 259/259 [00:00<00:00, 316.65it/s, Batch Loss=3.33]


Epoch 17/20, Average Loss: 4.1559


Epoch 18/20: 100%|██████████| 259/259 [00:00<00:00, 321.11it/s, Batch Loss=3.34]


Epoch 18/20, Average Loss: 4.1645


Epoch 19/20: 100%|██████████| 259/259 [00:00<00:00, 322.93it/s, Batch Loss=3.33]


Epoch 19/20, Average Loss: 4.1573


Epoch 20/20: 100%|██████████| 259/259 [00:00<00:00, 323.62it/s, Batch Loss=3.33]


Epoch 20/20, Average Loss: 4.1561


Generating Modified Embeddings: 100%|██████████| 259/259 [00:00<00:00, 1154.22it/s]


Modified embeddings saved!


In [9]:
'''
The case 1 embeddings avaerage loss is lesser so using that only 
'''

'\nThe case 1 embeddings avaerage loss is lesser so using that only \n'