In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# training and Validation
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
# Load data from pickled files
file_path = "/content/drive/MyDrive/BV/text_embeddings_final.pkl"
with open(file_path, "rb") as file:
    story_embeddings = pickle.load(file)

file_path = "/content/drive/MyDrive/BV/wave_embeddings.pkl"
with open(file_path, "rb") as file:
    wave_embeddings = pickle.load(file)

old_key = 'the_black_willow'
new_key = 'The_Black_Willow'
story_embeddings[new_key] = story_embeddings[old_key]
del story_embeddings[old_key]

old_key = 'The_Black_Widow'
new_key = 'The_Black_Willow'
wave_embeddings[new_key] = wave_embeddings[old_key]
del wave_embeddings[old_key]


# Here we process wave arrays and story tensors, converting them into PyTorch tensors, and stacking them. Preprocessing is done before loading the data into the model.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

result_dict = {}
for story_key, wave_array_list in wave_embeddings.items():
    story_tensor = story_embeddings[story_key]
    for wave_array in wave_array_list:
        result_dict[tuple(wave_array)] = story_tensor

source_embeddings = list(result_dict.keys())
source_embeddings = [torch.tensor(t) for t in source_embeddings]
source_embeddings = torch.stack(source_embeddings).to(device)

target_embeddings = list(result_dict.values())
target_embeddings = [torch.tensor(t) for t in target_embeddings]
target_embeddings = torch.stack(target_embeddings).to(device)


# Split data into train, test, and validation sets
source_train, source_temp, target_train, target_temp = train_test_split(source_embeddings, target_embeddings, test_size=0.2, random_state=42)
source_val, source_test, target_val, target_test = train_test_split(source_temp, target_temp, test_size=0.5, random_state=42)


# Define a module for Multi-Head Self-Attention
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        self.head_dim = d_model // num_heads
        self.q_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.output_linear = nn.Linear(d_model, d_model)

    def split_heads(self, x, batch_size):
        x = x.view(batch_size, -1, self.num_heads, self.head_dim)
        return x.permute(0, 2, 1, 3)

    def forward(self, query, key, value, mask):
        batch_size = query.size(0)
        q = self.split_heads(self.q_linear(query), batch_size)
        k = self.split_heads(self.k_linear(key), batch_size)
        v = self.split_heads(self.v_linear(value), batch_size)
        scores = torch.matmul(q, k.transpose(-2, -1)) / (self.head_dim ** 0.5)
        scores = scores.masked_fill(mask == 0, float("-1e20"))
        attn_weights = F.softmax(scores, dim=-1)
        attn_output = torch.matmul(attn_weights, v)
        attn_output = attn_output.permute(0, 2, 1, 3).contiguous().view(batch_size, -1, self.d_model)
        output = self.output_linear(attn_output)
        return output

# Define a module for the Decoder Layer
class DecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super(DecoderLayer, self).__init__()
        self.multihead_attention = MultiHeadSelfAttention(d_model, num_heads)
        self.feedforward = nn.Sequential(
            nn.Linear(d_model, d_ff),
            nn.ReLU(),
            nn.Linear(d_ff, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, target_emb, source_emb, target_mask):
        attn_output = self.multihead_attention(target_emb, target_emb, target_emb, target_mask)
        target_emb = target_emb + self.dropout(attn_output)
        target_emb = self.norm1(target_emb)
        ff_output = self.feedforward(target_emb)
        target_emb = target_emb + self.dropout(ff_output)
        target_emb = self.norm2(target_emb)
        return target_emb

# Define the Decoder module with multiple layers
class Decoder(nn.Module):
    def __init__(self, num_layers, d_model, num_heads, d_ff, dropout=0.1):
        super(Decoder, self).__init__()
        self.num_layers = num_layers
        self.layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])

    def forward(self, target_emb, source_emb, target_mask):
        for layer in self.layers:
            target_emb = layer(target_emb, source_emb, target_mask)
        return target_emb

# Define hyperparameters
num_layers = 12  # Number of decoder layers
d_model = 512
num_heads = 8
d_ff = 2048
dropout = 0.5
output_embeddings_dict = {}
# Instantiate the decoder with multiple layers
decoder = Decoder(num_layers, d_model, num_heads, d_ff, dropout).to(device)
# Training loop
batch_size = 1
num_epochs = 30
loss_function = nn.MSELoss()
optimizer = optim.Adam(decoder.parameters(), lr=0.001)

best_epoch = -1
best_mae = float('inf')
best_embeddings = None

for epoch in tqdm(range(num_epochs)):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    total_loss = 0.0
    total_mae = 0.0

    # Train phase
    decoder.train()
    for instance_idx in range(target_train.shape[0]):
        single_instance_target_emb = target_train[instance_idx:instance_idx+1, :]
        single_instance_source_emb = source_train[instance_idx:instance_idx+1, :]
        sequence_length = single_instance_target_emb.size(0)
        target_mask = torch.ones(batch_size, num_heads, sequence_length, sequence_length).to(device)

        single_instance_output_emb = decoder(single_instance_target_emb, single_instance_source_emb, target_mask)

        loss = loss_function(single_instance_output_emb, single_instance_target_emb.expand_as(single_instance_output_emb))
        total_loss += loss.item()

        mae = torch.mean(torch.abs(single_instance_output_emb - single_instance_target_emb))
        total_mae += mae.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = total_loss / target_train.shape[0]
    epoch_mae = total_mae / target_train.shape[0]

    print(f"Train - Epoch Loss (MSE): {epoch_loss:.4f}, Epoch MAE: {epoch_mae:.4f}")

    # Validation phase
    decoder.eval()
    total_val_mae = 0.0
    with torch.no_grad():
        for val_idx in range(target_val.shape[0]):
            val_target_emb = target_val[val_idx:val_idx+1, :]
            val_source_emb = source_val[val_idx:val_idx+1, :]
            val_sequence_length = val_target_emb.size(0)
            val_target_mask = torch.ones(batch_size, num_heads, val_sequence_length, val_sequence_length).to(device)

            val_output_emb = decoder(val_target_emb, val_source_emb, val_target_mask)

            val_mae = torch.mean(torch.abs(val_output_emb - val_target_emb))
            total_val_mae += val_mae.item()

    val_epoch_mae = total_val_mae / target_val.shape[0]
    print(f"Validation - Epoch MAE: {val_epoch_mae:.4f}")

    if val_epoch_mae < best_mae:
        best_mae = val_epoch_mae
        best_epoch = epoch
        best_embeddings = output_embeddings_dict

print(f"Epoch with Least MAE on Validation: {best_epoch + 1}, Least MAE: {best_mae:.4f}")


  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 1/30
Train - Epoch Loss (MSE): 0.7852, Epoch MAE: 0.5081
Validation - Epoch MAE: 0.4318
Epoch 2/30
Train - Epoch Loss (MSE): 0.4592, Epoch MAE: 0.2851
Validation - Epoch MAE: 0.1654
Epoch 3/30
Train - Epoch Loss (MSE): 0.2123, Epoch MAE: 0.1721
Validation - Epoch MAE: 0.0988
Epoch 4/30
Train - Epoch Loss (MSE): 0.0931, Epoch MAE: 0.1299
Validation - Epoch MAE: 0.0835
Epoch 5/30
Train - Epoch Loss (MSE): 0.0644, Epoch MAE: 0.1336
Validation - Epoch MAE: 0.0717
Epoch 6/30
Train - Epoch Loss (MSE): 0.0571, Epoch MAE: 0.1167
Validation - Epoch MAE: 0.0591
Epoch 7/30
Train - Epoch Loss (MSE): 0.0420, Epoch MAE: 0.1010
Validation - Epoch MAE: 0.0654
Epoch 8/30
Train - Epoch Loss (MSE): 0.0429, Epoch MAE: 0.1083
Validation - Epoch MAE: 0.0776
Epoch 9/30
Train - Epoch Loss (MSE): 0.0231, Epoch MAE: 0.0847
Validation - Epoch MAE: 0.0562
Epoch 10/30
Train - Epoch Loss (MSE): 0.0255, Epoch MAE: 0.0861
Validation - Epoch MAE: 0.0574
Epoch 11/30
Train - Epoch Loss (MSE): 0.0169, Epoch MAE: 0.

In [4]:
# Displaing the decoder architecture
decoder

Decoder(
  (layers): ModuleList(
    (0-11): 12 x DecoderLayer(
      (multihead_attention): MultiHeadSelfAttention(
        (q_linear): Linear(in_features=512, out_features=512, bias=True)
        (k_linear): Linear(in_features=512, out_features=512, bias=True)
        (v_linear): Linear(in_features=512, out_features=512, bias=True)
        (output_linear): Linear(in_features=512, out_features=512, bias=True)
      )
      (feedforward): Sequential(
        (0): Linear(in_features=512, out_features=2048, bias=True)
        (1): ReLU()
        (2): Linear(in_features=2048, out_features=512, bias=True)
      )
      (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.5, inplace=False)
    )
  )
)

In [5]:
decoder.eval()
total_test_mae = 0.0
device = torch.device("cuda:0")  # Specify the device

# Initializing empty tensors on the GPU
output_test_embeddings = torch.empty(0, device=device)
predicted_test_embeddings = torch.empty(0, device=device)

with torch.no_grad():
    for test_idx in range(target_test.shape[0]):
        test_target_emb = target_test[test_idx:test_idx+1, :].to(device)
        print(test_target_emb.shape)
        test_source_emb = source_test[test_idx:test_idx+1, :].to(device)
        print(test_source_emb.shape)
        test_sequence_length = test_target_emb.size(0)
        print(test_sequence_length)
        test_target_mask = torch.ones(batch_size, num_heads, test_sequence_length, test_sequence_length).to(device)
        print(test_target_mask.shape)
        test_output_emb = decoder(test_target_emb, test_source_emb, test_target_mask)

        output_test_embeddings = torch.cat((output_test_embeddings, test_target_emb), dim=0)
        predicted_test_embeddings = torch.cat((predicted_test_embeddings, test_output_emb), dim=0)
        print(predicted_test_embeddings)
        test_mae = torch.mean(torch.abs(test_output_emb - test_target_emb))
        total_test_mae += test_mae.item()

test_avg_mae = total_test_mae / target_test.shape[0]
print(f"Test MAE: {test_avg_mae:.4f}")

# Display some test embeddings and predictions
for i in range(5):  # Displaying for the first 5 instances
    print(f"Instance {i+1}")
    print("Actual Test Embedding:", output_test_embeddings[i])
    print("Predicted Test Embedding:", predicted_test_embeddings[i])
    print()


torch.Size([1, 512])
torch.Size([1, 512])
1
torch.Size([1, 8, 1, 1])
tensor([[[ 1.3015e-01,  3.4010e-02, -3.8032e-01, -7.3006e-02, -7.1380e-02,
           5.0573e-03, -7.9301e-02, -5.7942e-02, -9.0780e-02, -1.3429e-02,
          -2.8902e-02,  5.9447e-03,  1.2710e-01, -5.9190e-03,  1.3107e-01,
          -7.2216e-02,  3.2451e-02,  5.3888e-02,  2.2500e-02, -6.9930e-02,
           5.3800e-02,  4.7045e-02, -5.1027e-02,  2.4559e-02, -1.5396e-01,
          -3.1931e-02, -1.9677e-02, -4.5927e-02,  2.7980e-02, -5.9914e-02,
          -1.6586e-02,  1.6148e-02, -7.2353e-02, -2.5947e-02,  1.5338e-02,
          -5.9690e-02, -8.3962e-02, -5.2255e-03,  7.3880e-02,  1.7581e-02,
          -6.5262e-02, -3.0013e-02,  6.6443e-02,  4.8191e-02,  7.5019e-03,
          -4.7023e-02, -5.6095e-03,  4.6076e-02, -1.3617e-01, -1.1183e-01,
          -1.3935e-02, -3.9219e-02,  1.5452e-02,  4.1362e-02,  1.2479e-01,
          -6.7275e-03, -2.5949e-02,  3.6933e-02, -4.9813e-02,  2.6830e-02,
           2.7478e-02, -5.0450e

In [6]:
import pickle
# Save the data to a pickle file
with open('/content/drive/MyDrive/BV/data.pickle', 'wb') as f:
    pickle.dump(predicted_test_embeddings, f)

In [7]:
predicted_test_embeddings = predicted_test_embeddings.squeeze(1)
predicted_test_embeddings.shape

torch.Size([20, 512])

In [8]:
output_test_embeddings.shape

torch.Size([20, 512])

In [9]:
# Normalize the embeddings to unit length
import torch.nn.functional as F

target_embeddings_normalized = F.normalize(predicted_test_embeddings, p=2, dim=1)
output_embeddings_normalized = F.normalize(output_test_embeddings, p=2, dim=1)

# Calculate cosine similarity scores
cosine_similarities = F.cosine_similarity(output_test_embeddings, predicted_test_embeddings, dim=1)

# Print cosine similarity scores
print(cosine_similarities.mean(dim =0))

tensor(0.5329, device='cuda:0')


In [10]:
# Load data from pickled files
file_path = "/content/drive/MyDrive/BV/text_embeddings.pkl"
with open(file_path, "rb") as file:
    d = pickle.load(file)
d

{'cable_spool_fort': {'<s>': tensor(0.1645),
  'The': tensor(0.0574),
  'ĠCable': tensor(-0.3775),
  'ĠSp': tensor(-0.0997),
  'ool': tensor(-0.0473),
  'ĠFort': tensor(-0.0448),
  'Ġby': tensor(-0.0589),
  'ĠBill': tensor(0.0053),
  'ĠGlover': tensor(-0.0518),
  'Ġ"': tensor(-0.1101),
  'ĠHey': tensor(0.1310),
  'Ġ,': tensor(-0.0666),
  'ĠRoy': tensor(-0.1023),
  'Ġ?': tensor(0.0441),
  'ĠWhat': tensor(0.1653),
  'ĠYou': tensor(-0.2052),
  'Ġsuck': tensor(-0.0393),
  'Ġ.': tensor(0.1915),
  'ĠChad': tensor(0.0533),
  'Ġsaid': tensor(-0.1395),
  'ĠHe': tensor(-0.0153),
  'Ġwished': tensor(-0.0922),
  'Ġwould': tensor(-0.1465),
  'Ġn': tensor(-0.0822),
  "'t": tensor(-0.0798),
  'Ġfall': tensor(-0.1053),
  'Ġfor': tensor(-0.0229),
  'Ġthat': tensor(-0.0051),
  'Ġgag': tensor(-0.1052),
  'Ġevery': tensor(-0.1373),
  'Ġtime': tensor(-0.0600),
  'Ġget': tensor(0.0820),
  'Ġme': tensor(-0.0889),
  'Ġa': tensor(-0.0495),
  'Ġbig': tensor(-0.1019),
  'Ġrock': tensor(-0.0025),
  'Ġsto': tensor

In [11]:
flattened_dict = {}

for story_name, token_dict in d.items():
    for token, value in token_dict.items():
        flattened_dict[token] = value

print(flattened_dict)

{'<s>': tensor(0.1682), 'The': tensor(0.0945), 'ĠCable': tensor(-0.3775), 'ĠSp': tensor(-0.0997), 'ool': tensor(-0.0473), 'ĠFort': tensor(-0.0448), 'Ġby': tensor(-0.0089), 'ĠBill': tensor(-0.1301), 'ĠGlover': tensor(-0.1803), 'Ġ"': tensor(-0.1240), 'ĠHey': tensor(0.1310), 'Ġ,': tensor(0.3252), 'ĠRoy': tensor(-0.1023), 'Ġ?': tensor(-0.0082), 'ĠWhat': tensor(0.2625), 'ĠYou': tensor(-0.1184), 'Ġsuck': tensor(-0.0393), 'Ġ.': tensor(0.0736), 'ĠChad': tensor(0.0533), 'Ġsaid': tensor(0.0898), 'ĠHe': tensor(0.0475), 'Ġwished': tensor(-0.0922), 'Ġwould': tensor(-0.1465), 'Ġn': tensor(0.2647), "'t": tensor(-0.0068), 'Ġfall': tensor(-0.1053), 'Ġfor': tensor(0.1108), 'Ġthat': tensor(-0.0842), 'Ġgag': tensor(-0.1052), 'Ġevery': tensor(-0.0559), 'Ġtime': tensor(-0.0582), 'Ġget': tensor(-0.1706), 'Ġme': tensor(0.0165), 'Ġa': tensor(-0.0083), 'Ġbig': tensor(-0.1019), 'Ġrock': tensor(-0.0025), 'Ġsto': tensor(-0.1193), 'oped': tensor(0.0128), 'Ġto': tensor(0.2275), 'Ġpick': tensor(-0.0258), 'Ġup': tenso

In [12]:
values = flattened_dict.values()

# Find the range of values
min_value = min(values)
max_value = max(values)

value_range = max_value - min_value

In [14]:
predicted_test_embeddings=torch.clamp(predicted_test_embeddings.cpu(),min_value,max_value)

In [24]:
#This code defines a function called find_closest_tokens_numeric which uses absolute value to find the closest tokens (words or items) in a given dictionary to the target embeddings.
#The code then applies this function to a set of predicted test embeddings to generate lists of closest tokens for each row of embeddings.
from scipy.spatial.distance import cosine

def find_closest_tokens_numeric(target, dictionary):
    closest_tokens = []

    for row in target.detach().cpu().numpy():
        row_closest_tokens = []

        for value in row:
            closest_token = min(dictionary, key=lambda x: abs(value - dictionary[x]))
            row_closest_tokens.append(closest_token)

        closest_tokens.append(row_closest_tokens)

    return closest_tokens
# Convert tensor values to lists of closest tokens using cosine similarity
word_lists1 = find_closest_tokens_numeric(predicted_test_embeddings.cpu(), flattened_dict)

print(word_lists1)

  0%|          | 0/20 [00:00<?, ?it/s]

[['<s>', 'The', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', 'The', 'ĠCable', '<s>', 'ĠCable', '<s>', '<s>', 'The', 'ĠFort', '<s>', '<s>', 'ĠCable', '<s>', 'ĠCable', 'ĠCable', 'ĠSp', 'ĠCable', '<s>', 'ĠCable', 'ĠSp', '<s>', 'ĠCable', 'ĠCable', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', '<s>', 'ĠCable', 'ĠSp', '<s>', '<s>', '<s>', 'ĠCable', 'ĠCable', 'The', 'ĠCable', 'ĠCable', 'ĠSp', 'ĠCable', '<s>', 'The', '<s>', 'ĠCable', 'ĠCable', '<s>', 'ĠCable', 'The', '<s>', 'ĠCable', 'ĠCable', 'ool', '<s>', '<s>', '<s>', 'The', 'ĠCable', 'ĠCable', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', 'ĠSp', '<s>', '<s>', 'ĠSp', 'The', '<s>', 'ĠCable', 'ĠCable', '<s>', '<s>', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', 'The', 'ĠCable', 'ĠCable', 'The', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', 'ĠCable', '<s>', 'ĠSp', 'ĠSp', 'ool', 'ĠCable', 'ĠCable', 'ool', 'ĠCable', '<s>', 'ĠCable', '<s>', '<s>', '<s>', 'ĠCable', '<s>', 'ĠCable', '<s>', 'ĠCable', '<s>', 'The', 'ĠCable'

In [25]:
# Convert tensor values to lists of closest tokens using absolute distance similarity
word_lists2 = find_closest_tokens_numeric(output_test_embeddings.cpu(), flattened_dict)

print(word_lists2)

  0%|          | 0/20 [00:00<?, ?it/s]

  dist = 1.0 - uv / np.sqrt(uu * vv)


[['<s>', '<s>', 'ĠCable', 'ĠSp', 'The', '<s>', '<s>', 'ool', 'ĠCable', 'The', 'ĠSp', '<s>', 'The', 'ool', 'The', 'ĠCable', '<s>', 'ĠSp', 'ĠSp', 'ĠSp', '<s>', '<s>', '<s>', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', 'ĠSp', '<s>', 'ĠCable', 'ĠCable', '<s>', '<s>', 'The', 'The', '<s>', 'ĠCable', '<s>', 'ĠCable', 'The', 'ĠCable', 'ĠCable', 'Ġ,', 'ĠCable', 'ĠCable', '<s>', '<s>', '<s>', '<s>', 'The', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', 'The', 'ĠSp', 'The', '<s>', 'ĠCable', '<s>', 'The', 'ĠCable', 'ĠCable', '<s>', '<s>', 'The', 'ĠSp', 'ĠCable', 'ĠCable', 'ĠSp', '<s>', '<s>', 'ĠCable', 'ĠSp', '<s>', '<s>', 'ĠCable', 'ĠSp', 'ĠCable', '<s>', '<s>', 'The', '<s>', '<s>', '<s>', '<s>', '<s>', 'ool', 'ĠCable', '<s>', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', '<s>', '<s>', '<s>', 'ĠCable', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', 'ĠCable', 'ĠCable', '<s>', '<s>', '<s>', 'ĠCable', 'ĠCable', 'ĠCable', '<s>', 'The', '<s>', '<s>', 'ĠCable', '<s>', 'ĠCable', 'ool', 'ĠSp', '<s>', '<s>'

In [26]:
import re
actual = []
predicted=[]
for i in range(len(word_lists2)):
  pred = " ".join(word_lists1[i])
  #pred = re.sub(r"[^a-zA-Z\s]", "", pred)
  #pred = pred.split()
  act = " ".join(word_lists2[i])
  #act = re.sub(r"[^a-zA-Z\s]", "", act)
  #act = act.split()
  predicted.append(pred)
  actual.append(act)

In [27]:
pip install rouge_score



In [28]:
# this code calculates ROUGE scores for pairs of candidate and reference texts using the provided token lists (word_lists1 and word_lists2).
# It accumulates the scores for each metric and calculates the average scores, then prints out the average ROUGE scores for ROUGE-1, ROUGE-2, and ROUGE-L metrics.
#These scores provide insights into the quality of the generated text compared to the reference text in terms of content overlap and linguistic similarity.
from rouge_score import rouge_scorer

def calculate_rouge_scores(candidate, reference):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(candidate, reference)
    return scores

total_rouge_scores = {'rouge1': 0.0, 'rouge2': 0.0, 'rougeL': 0.0}
num_pairs = len(word_lists1)

for i in range(num_pairs):
    candidate = " ".join(word_lists1[i])
    reference = " ".join(word_lists2[i])
    rouge_scores = calculate_rouge_scores(candidate, reference)

    for metric in ['rouge1', 'rouge2', 'rougeL']:
        total_rouge_scores[metric] += rouge_scores[metric].fmeasure

average_rouge_scores = {metric: total_score / num_pairs for metric, total_score in total_rouge_scores.items()}

print("Average ROUGE Scores:")
for metric, score in average_rouge_scores.items():
    print(f"{metric}: {score:.4f}")


Average ROUGE Scores:
rouge1: 0.5565
rouge2: 0.4809
rougeL: 0.4138


In [None]:
model_path = "//content/drive/MyDrive/BV/BRAIN2TEXT.pth"
torch.save(decoder, model_path)

print(f"Model saved to {model_path}")

Model saved to //content/drive/MyDrive/BV/BRAIN2TEXT.pth


In [None]:
import torch
import torch.nn as nn

# Define the path to the saved model
model_path = "//content/drive/MyDrive/BV/BRAIN2TEXT.pth"

# Load the saved model
loaded_decoder = torch.load(model_path)

# Make sure to set the model to evaluation mode if needed
loaded_decoder.eval()

print(f"Decoder model loaded from {model_path}")


Decoder model loaded from //content/drive/MyDrive/BV/BRAIN2TEXT.pth


In [None]:
loaded_decoder

Decoder(
  (layers): ModuleList(
    (0-11): 12 x DecoderLayer(
      (multihead_attention): MultiHeadSelfAttention(
        (q_linear): Linear(in_features=512, out_features=512, bias=True)
        (k_linear): Linear(in_features=512, out_features=512, bias=True)
        (v_linear): Linear(in_features=512, out_features=512, bias=True)
        (output_linear): Linear(in_features=512, out_features=512, bias=True)
      )
      (feedforward): Sequential(
        (0): Linear(in_features=512, out_features=2048, bias=True)
        (1): ReLU()
        (2): Linear(in_features=2048, out_features=512, bias=True)
      )
      (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.5, inplace=False)
    )
  )
)