In [15]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
'''
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''        

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

"\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n    for filename in filenames:\n        print(os.path.join(dirname, filename))\n"

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [17]:
# Dataset for EEG-Text Embeddings
class EEGTextDataset(Dataset):
    def __init__(self, eeg_embeddings, text_embeddings):
        self.eeg_embeddings = eeg_embeddings  # Shape: (num_samples, eeg_dim)
        self.text_embeddings = text_embeddings  # Shape: (num_samples, text_dim)

    def __len__(self):
        return len(self.eeg_embeddings)

    def __getitem__(self, idx):
        return torch.tensor(self.eeg_embeddings[idx], dtype=torch.float32), torch.tensor(self.text_embeddings[idx], dtype=torch.float32)

In [18]:
# CLIP-like Model for EEG-Text Alignment
class EEGTextCLIP(nn.Module):
    def __init__(self, eeg_dim, text_dim, projection_dim):
        super(EEGTextCLIP, self).__init__()
        self.eeg_projection = nn.Linear(eeg_dim, projection_dim)
        self.text_projection = nn.Linear(text_dim, projection_dim)

    def forward(self, eeg_embeddings, text_embeddings):
        # Project EEG and text embeddings into the same space
        eeg_proj = self.eeg_projection(eeg_embeddings)
        text_proj = self.text_projection(text_embeddings)

        # Normalize embeddings
        eeg_proj = eeg_proj / eeg_proj.norm(dim=-1, keepdim=True)
        text_proj = text_proj / text_proj.norm(dim=-1, keepdim=True)

        return eeg_proj, text_proj

In [19]:
# Contrastive Loss for EEG-Text Alignment
def contrastive_loss(eeg_proj, text_proj, temperature=0.07):
    """
    Compute contrastive loss between EEG and Text embeddings.
    """
    logits = torch.mm(eeg_proj, text_proj.t()) / temperature  # Cosine similarity matrix
    labels = torch.arange(len(eeg_proj)).to(logits.device)  # Correct pairs (diagonal)

    loss_eeg_to_text = nn.CrossEntropyLoss()(logits, labels)
    loss_text_to_eeg = nn.CrossEntropyLoss()(logits.t(), labels)

    return (loss_eeg_to_text + loss_text_to_eeg) / 2

In [20]:
# Training Loop
def train_clip_model(model, dataloader, optimizer, num_epochs=10, temperature=0.07):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for eeg_embeddings, text_embeddings in dataloader:
            eeg_embeddings, text_embeddings = eeg_embeddings.to(device), text_embeddings.to(device)

            optimizer.zero_grad()
            eeg_proj, text_proj = model(eeg_embeddings, text_embeddings)

            loss = contrastive_loss(eeg_proj, text_proj, temperature)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss:.4f}")

In [21]:
# Main Function
if __name__ == "__main__":
    # Load EEG and Text embeddings from .npy files
    eeg_embeddings_path = "/kaggle/input/modified-eeg-embeddings/modified_eeg_embeddings.npy"  
    text_embeddings_path = "/kaggle/input/modified-eeg-embeddings/modified_text_embeddings.npy"  

    eeg_embeddings = np.load(eeg_embeddings_path)  # Shape: (num_samples, eeg_dim)
    text_embeddings = np.load(text_embeddings_path)  # Shape: (num_samples, text_dim)

    # Ensure EEG and Text embeddings have the same number of samples
    assert len(eeg_embeddings) == len(text_embeddings), "Mismatch in number of EEG and Text embeddings!"

    # Dataset and DataLoader
    batch_size = 32
    dataset = EEGTextDataset(eeg_embeddings, text_embeddings)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Model Parameters
    eeg_dim = eeg_embeddings.shape[1]  # Dimensionality of EEG embeddings
    text_dim = text_embeddings.shape[1]  # Dimensionality of Text embeddings
    projection_dim = 256  # Joint embedding space dimensionality

    # Model, Optimizer, and Training
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = EEGTextCLIP(eeg_dim, text_dim, projection_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # Train the model
    train_clip_model(model, dataloader, optimizer, num_epochs=500)

Epoch 1/500, Loss: 1716.1617
Epoch 2/500, Loss: 1660.7190
Epoch 3/500, Loss: 1640.3037
Epoch 4/500, Loss: 1626.3068
Epoch 5/500, Loss: 1619.0632
Epoch 6/500, Loss: 1613.9856
Epoch 7/500, Loss: 1608.6914
Epoch 8/500, Loss: 1606.4810
Epoch 9/500, Loss: 1602.4582
Epoch 10/500, Loss: 1600.7444
Epoch 11/500, Loss: 1597.6215
Epoch 12/500, Loss: 1597.6631
Epoch 13/500, Loss: 1592.6261
Epoch 14/500, Loss: 1592.9706
Epoch 15/500, Loss: 1591.3341
Epoch 16/500, Loss: 1589.6319
Epoch 17/500, Loss: 1588.3974
Epoch 18/500, Loss: 1587.6748
Epoch 19/500, Loss: 1585.3223
Epoch 20/500, Loss: 1584.8685
Epoch 21/500, Loss: 1584.6960
Epoch 22/500, Loss: 1582.4317
Epoch 23/500, Loss: 1582.7033
Epoch 24/500, Loss: 1582.6707
Epoch 25/500, Loss: 1581.0385
Epoch 26/500, Loss: 1577.8963
Epoch 27/500, Loss: 1579.3810
Epoch 28/500, Loss: 1577.3377
Epoch 29/500, Loss: 1579.0590
Epoch 30/500, Loss: 1576.7116
Epoch 31/500, Loss: 1577.8069
Epoch 32/500, Loss: 1577.0839
Epoch 33/500, Loss: 1574.6722
Epoch 34/500, Loss:

In [13]:
torch.save(model.state_dict(), "clip_model.pth")

In [15]:
import numpy as np
from sklearn.model_selection import train_test_split

# Function to split data
def split_and_save_data(eeg_embeddings_path, text_embeddings_path, test_size=0.2, random_seed=42):
    """
    Splits EEG and Text embeddings into train and test sets and saves them as .npy files.
    
    Args:
        eeg_embeddings_path (str): Path to the original EEG embeddings .npy file.
        text_embeddings_path (str): Path to the original Text embeddings .npy file.
        test_size (float): Fraction of data to use as the test set.
        random_seed (int): Random seed for reproducibility.
    
    Returns:
        None
    """
    # Load the embeddings
    eeg_embeddings = np.load(eeg_embeddings_path)
    text_embeddings = np.load(text_embeddings_path)

    # Ensure both have the same number of samples
    assert len(eeg_embeddings) == len(text_embeddings), "Mismatch in number of EEG and Text embeddings!"

    # Split the data into train and test sets
    eeg_train, eeg_test, text_train, text_test = train_test_split(
        eeg_embeddings, text_embeddings, test_size=test_size, random_state=random_seed
    )

    # Save the splits to .npy files
    np.save("train_eeg_embeddings.npy", eeg_train)
    np.save("test_eeg_embeddings.npy", eeg_test)
    np.save("train_text_embeddings.npy", text_train)
    np.save("test_text_embeddings.npy", text_test)

    print("Data split completed and saved as .npy files:")
    print(f"Train EEG: {eeg_train.shape}, Test EEG: {eeg_test.shape}")
    print(f"Train Text: {text_train.shape}, Test Text: {text_test.shape}")


# Example Usage
if __name__ == "__main__":
    # Input paths for the original embeddings
    original_eeg_embeddings_path = "/kaggle/input/embeddings/eeg_embeddings.npy"  # Path to your original EEG embeddings
    original_text_embeddings_path = "/kaggle/input/embeddings/generated_captions_embeddings.npy"  # Path to your original Text embeddings

    # Split and save data
    split_and_save_data(original_eeg_embeddings_path, original_text_embeddings_path, test_size=0.2)

Data split completed and saved as .npy files:
Train EEG: (13232, 50), Test EEG: (3308, 50)
Train Text: (13232, 384), Test Text: (3308, 384)


In [17]:
## Testing 

import torch
from torch.utils.data import DataLoader
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Function to test the model
def test_clip_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode

    all_eeg_proj = []
    all_text_proj = []

    with torch.no_grad():  # No gradient calculation needed for testing
        for eeg_embeddings, text_embeddings in dataloader:
            eeg_embeddings, text_embeddings = eeg_embeddings.to(device), text_embeddings.to(device)

            # Forward pass
            eeg_proj, text_proj = model(eeg_embeddings, text_embeddings)

            # Collect projections
            all_eeg_proj.append(eeg_proj.cpu().numpy())
            all_text_proj.append(text_proj.cpu().numpy())

    # Concatenate all projections
    all_eeg_proj = np.concatenate(all_eeg_proj, axis=0)
    all_text_proj = np.concatenate(all_text_proj, axis=0)

    # Compute cosine similarity between EEG and Text embeddings
    similarities = cosine_similarity(all_eeg_proj, all_text_proj)

    return similarities


# Main Testing Script
if __name__ == "__main__":
    # Load EEG and Text embeddings for testing
    test_eeg_embeddings_path = "/kaggle/working/test_eeg_embeddings.npy"  # Path to EEG embeddings for testing
    test_text_embeddings_path = "/kaggle/working/test_text_embeddings.npy"  # Path to Text embeddings for testing

    test_eeg_embeddings = np.load(test_eeg_embeddings_path)  # Shape: (num_test_samples, eeg_dim)
    test_text_embeddings = np.load(test_text_embeddings_path)  # Shape: (num_test_samples, text_dim)

    # Ensure test EEG and Text embeddings have the same number of samples
    assert len(test_eeg_embeddings) == len(test_text_embeddings), "Mismatch in number of test EEG and Text embeddings!"

    # Dataset and DataLoader
    batch_size = 32
    test_dataset = EEGTextDataset(test_eeg_embeddings, test_text_embeddings)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Load the trained model
    eeg_dim = test_eeg_embeddings.shape[1]  # Dimensionality of EEG embeddings
    text_dim = test_text_embeddings.shape[1]  # Dimensionality of Text embeddings
    projection_dim = 256  # Joint embedding space dimensionality

    model = EEGTextCLIP(eeg_dim, text_dim, projection_dim).to(device)

    # Load the trained weights (update the file path as needed)
    checkpoint_path = "/kaggle/working/clip_model.pth"  # Path to the saved trained model checkpoint
    model.load_state_dict(torch.load(checkpoint_path))

    # Test the model
    similarities = test_clip_model(model, test_dataloader)

    # Analyze the results
    print("Cosine Similarity Matrix:")
    print(similarities)

    # Optionally, you can evaluate performance metrics
    diagonal_similarities = np.diag(similarities)  # Similarities for matching EEG-Text pairs
    mean_similarity = np.mean(diagonal_similarities)

    print(f"Mean Cosine Similarity for Matching EEG-Text Pairs: {mean_similarity:.4f}")


  model.load_state_dict(torch.load(checkpoint_path))


Cosine Similarity Matrix:
[[ 0.00976474  0.04244883  0.03584838 ...  0.05399805 -0.02907151
   0.04380765]
 [ 0.12674272  0.02088922  0.08711144 ...  0.04154975  0.03713179
  -0.04295124]
 [ 0.05789004  0.02035929  0.10008927 ...  0.0952059  -0.01581091
  -0.04530226]
 ...
 [-0.03378122 -0.05738352 -0.02055039 ...  0.05802902 -0.02703595
   0.07298591]
 [ 0.03478    -0.00320802  0.09120285 ... -0.03052808  0.07630415
  -0.00502096]
 [ 0.09873109  0.04619924  0.03355896 ...  0.0002348   0.02351372
  -0.0135108 ]]
Mean Cosine Similarity for Matching EEG-Text Pairs: 0.0754


In [None]:
import torch
import numpy as np

# Assuming 'EEGtoTextModel' class is already defined as in the training code

# Function to load the trained model
def load_trained_model(model_path, input_dim, hidden_dim):
    # Initialize the model
    model = EEGtoTextModel(input_dim=input_dim, hidden_dim=hidden_dim)
    
    # Load the trained weights
    model.load_state_dict(torch.load(model_path))
    model.eval()  # Set the model to evaluation mode
    return model

# Function to map EEG embedding to text embedding
def map_eeg_to_text(eeg_embedding, model, device):
    # Convert EEG embedding to tensor
    eeg_tensor = torch.tensor(eeg_embedding, dtype=torch.float32).to(device).unsqueeze(0)  # Add batch dimension
    
    # Generate text embedding
    with torch.no_grad():
        text_embedding = model(eeg_tensor).squeeze(0).cpu().numpy()  # Remove batch dimension
    return text_embedding

# Paths and parameters
model_path = "/kaggle/working/clip_model.pth"  # Replace with your trained model's path
eeg_embedding_file = "single_eeg_embedding.npy"  # Path to EEG embedding (.npy format)
input_dim = 512  # EEG embedding dimension
hidden_dim = 512  # Text embedding dimension
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the trained model
model = load_trained_model(model_path, input_dim, hidden_dim).to(device)

# Load the EEG embedding
eeg_embedding = np.load(eeg_embedding_file)  # Assuming shape is (512,)

# Generate the corresponding text embedding
text_embedding = map_eeg_to_text(eeg_embedding, model, device)

# Save or print the resulting text embedding
print("Generated Text Embedding:", text_embedding)

# Optional: Save the text embedding to a file
np.save("generated_text_embedding.npy", text_embedding)

In [6]:
import numpy as np
eeg_embedding = np.load('/kaggle/input/embeddings/eeg_embeddings.npy')
print(eeg_embedding[0])

50


In [7]:
# Function to load the trained model
def load_trained_model(model_path, input_dim, hidden_dim):
    # Initialize the model
    model = EEGtoTextModel(input_dim=input_dim, hidden_dim=hidden_dim)
    
    # Load the trained weights
    model.load_state_dict(torch.load(model_path))
    model.eval()  # Set the model to evaluation mode
    return model

# Function to map EEG embedding to text embedding
def map_eeg_to_text(eeg_embedding, model, device):
    # Convert EEG embedding to tensor
    eeg_tensor = torch.tensor(eeg_embedding, dtype=torch.float32).to(device).unsqueeze(0)  # Add batch dimension
    
    # Generate text embedding
    with torch.no_grad():
        text_embedding = model(eeg_tensor).squeeze(0).cpu().numpy()  # Remove batch dimension
    return text_embedding

In [9]:
import torch
import numpy as np

model_path = "/kaggle/working/clip_model.pth"  # Replace with your trained model's path
eeg_embedding_file = "/kaggle/input/embeddings/eeg_embeddings.npy"  # Path to EEG embedding (.npy format)

input_dim = 512  # EEG embedding dimension
hidden_dim = 512  # Text embedding dimension
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the trained model
model = load_trained_model(model_path, input_dim, hidden_dim).to(device)

# Load the EEG embedding
eeg_embedding = np.load(eeg_embedding_file)[0] 

# Generate the corresponding text embedding
text_embedding = map_eeg_to_text(eeg_embedding, model, device)

# Save or print the resulting text embedding
print("Generated Text Embedding:", text_embedding)

# Optional: Save the text embedding to a file
np.save("generated_text_embedding.npy", text_embedding)

NameError: name 'EEGtoTextModel' is not defined