In [1]:
import torch
import torch.nn as nn
import random

def generate_random_network():
    layers = []
    input_channels = random.randint(1, 3)
    input_size = random.randint(32, 224)
    output_size = random.randint(1, 10)
    num_layers = random.randint(2, 5)

    current_channels = input_channels
    current_size = input_size

    for i in range(num_layers):
        # Randomly choose layer type
        layer_type = random.choice(['linear', 'conv1d', 'conv2d'] if i < num_layers - 1 else ['linear'])

        if layer_type == 'linear':
            if i == 0:
                in_features = current_channels * current_size * current_size
            else:
                in_features = get_output_features(layers[-1], current_channels, current_size)
            out_features = random.randint(10, 200) if i < num_layers - 1 else output_size
            layers.append(nn.Linear(in_features, out_features))
            current_channels = 1
            current_size = out_features
        elif layer_type == 'conv1d':
            out_channels = random.randint(16, 64)
            layers.append(nn.Conv1d(current_channels, out_channels, kernel_size=3, padding=1))
            current_channels = out_channels
        elif layer_type == 'conv2d':
            out_channels = random.randint(16, 64)
            layers.append(nn.Conv2d(current_channels, out_channels, kernel_size=3, padding=1))
            current_channels = out_channels

        # Randomly add normalization
        if random.choice([True, False]):
            if layer_type == 'linear':
                layers.append(nn.BatchNorm1d(out_features))
            elif layer_type == 'conv1d':
                layers.append(nn.BatchNorm1d(out_channels))
            elif layer_type == 'conv2d':
                layers.append(nn.BatchNorm2d(out_channels))

        # Randomly add activation functions
        if random.choice([True, False]):
            layers.append(random.choice([nn.ReLU(), nn.LeakyReLU(), nn.Tanh(), nn.Sigmoid()]))

        # Randomly add dropout
        if random.choice([True, False]):
            layers.append(nn.Dropout(p=random.uniform(0.1, 0.5)))

    # Ensure the last layer is Linear and outputs the correct size
    if not isinstance(layers[-1], nn.Linear) or layers[-1].out_features != output_size:
        in_features = get_output_features(layers[-1], current_channels, current_size)
        layers.append(nn.Linear(in_features, output_size))

    return nn.Sequential(*layers)

def get_output_features(layer, current_channels, current_size):
    if isinstance(layer, nn.Linear):
        return layer.out_features
    elif isinstance(layer, (nn.Conv1d, nn.Conv2d)):
        return layer.out_channels
    elif isinstance(layer, (nn.BatchNorm1d, nn.BatchNorm2d)):
        return layer.num_features
    elif isinstance(layer, (nn.ReLU, nn.LeakyReLU, nn.Tanh, nn.Sigmoid, nn.Dropout)):
        return current_channels * current_size * current_size
    else:
        raise ValueError(f"Unsupported layer type: {type(layer)}")

# Test the function
try:
    model = generate_random_network()
    print(model)
except Exception as e:
    print(f"An error occurred: {e}")

Sequential(
  (0): Conv1d(2, 37, kernel_size=(3,), stride=(1,), padding=(1,))
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=587412, out_features=10, bias=True)
  (3): Tanh()
  (4): Linear(in_features=100, out_features=10, bias=True)
)


In [2]:
def generate_description(model):
    layers = list(model.children())

    # Determine input shape
    first_layer = layers[0]
    if isinstance(first_layer, nn.Linear):
        input_shape = f"b,{first_layer.in_features}"
    elif isinstance(first_layer, nn.Conv1d):
        input_shape = f"b,{first_layer.in_channels},w"
    elif isinstance(first_layer, nn.Conv2d):
        input_shape = f"b,{first_layer.in_channels},h,w"
    else:
        input_shape = "unknown"

    # Determine output shape
    last_layer = next((layer for layer in reversed(layers) if isinstance(layer, nn.Linear)), None)
    if last_layer:
        output_shape = f"b,{last_layer.out_features}"
    else:
        output_shape = "unknown"

    description = f"This neural network takes an input of shape ({input_shape}) "
    description += f"and produces an output of shape ({output_shape}). "
    description += f"It consists of {len(layers)} layers, including "

    layer_descriptions = []
    for layer in layers:
        if isinstance(layer, nn.Linear):
            layer_descriptions.append(f"a Linear layer ({layer.in_features} -> {layer.out_features})")
        elif isinstance(layer, nn.Conv1d):
            layer_descriptions.append(f"a 1D Convolutional layer ({layer.in_channels} -> {layer.out_channels}, kernel_size={layer.kernel_size[0]})")
        elif isinstance(layer, nn.Conv2d):
            layer_descriptions.append(f"a 2D Convolutional layer ({layer.in_channels} -> {layer.out_channels}, kernel_size={layer.kernel_size})")
        elif isinstance(layer, nn.BatchNorm1d):
            layer_descriptions.append(f"a 1D Batch Normalization (num_features={layer.num_features})")
        elif isinstance(layer, nn.BatchNorm2d):
            layer_descriptions.append(f"a 2D Batch Normalization (num_features={layer.num_features})")
        elif isinstance(layer, nn.ReLU):
            layer_descriptions.append("a ReLU activation")
        elif isinstance(layer, nn.LeakyReLU):
            layer_descriptions.append(f"a Leaky ReLU activation (negative_slope={layer.negative_slope:.2f})")
        elif isinstance(layer, nn.Tanh):
            layer_descriptions.append("a Tanh activation")
        elif isinstance(layer, nn.Sigmoid):
            layer_descriptions.append("a Sigmoid activation")
        elif isinstance(layer, nn.Dropout):
            layer_descriptions.append(f"a Dropout layer (p={layer.p:.2f})")
        else:
            layer_descriptions.append(f"an unknown layer type: {type(layer).__name__}")

    description += ", ".join(layer_descriptions) + "."
    return description

# Example usage:
model = generate_random_network()
description = generate_description(model)
print(description)

This neural network takes an input of shape (b,3,w) and produces an output of shape (b,5). It consists of 11 layers, including a 1D Convolutional layer (3 -> 34, kernel_size=3), a Dropout layer (p=0.31), a Linear layer (1240354 -> 142), a 2D Convolutional layer (1 -> 24, kernel_size=(3, 3)), a 2D Batch Normalization (num_features=24), a Tanh activation, a Dropout layer (p=0.32), a Linear layer (483936 -> 5), a 1D Batch Normalization (num_features=5), a ReLU activation, a Linear layer (25 -> 5).


In [3]:
import gc
def generate_dataset(num_samples):
    dataset = []
    for _ in range(num_samples):
        model = generate_random_network()
        description = generate_description(model)
        dataset.append((str(model), description))
        # Explicitly delete variables and run garbage collection
        del model
        del description
        gc.collect()
    return dataset

# Generate samples dataset
# sample_df = generate_dataset(1000)

In [4]:
sample_df[0]

('Sequential(\n  (0): Linear(in_features=34225, out_features=191, bias=True)\n  (1): BatchNorm1d(191, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n  (2): LeakyReLU(negative_slope=0.01)\n  (3): Linear(in_features=36481, out_features=8, bias=True)\n  (4): Tanh()\n  (5): Linear(in_features=64, out_features=8, bias=True)\n)',
 'This neural network takes an input of shape (b,34225) and produces an output of shape (b,8). It consists of 6 layers, including a Linear layer (34225 -> 191), a 1D Batch Normalization (num_features=191), a Leaky ReLU activation (negative_slope=0.01), a Linear layer (36481 -> 8), a Tanh activation, a Linear layer (64 -> 8).')

In [5]:
type(sample_df[0])

tuple

In [6]:
import pickle

def save_dataset(dataset, filename):
    with open(filename, 'wb') as f:
        pickle.dump(dataset, f)

# save_dataset(sample_df, 'neural_network_dataset.pkl')

In [7]:
!ls -al

total 366812
drwxr-xr-x 1 root root      4096 Jul 13 10:45 .
drwxr-xr-x 1 root root      4096 Jul 13 09:30 ..
-rw-r--r-- 1 root root 374524731 Jul 13 11:08 checkpoint.pt
drwxr-xr-x 4 root root      4096 Jul 11 13:21 .config
drwx------ 5 root root      4096 Jul 13 10:45 drive
-rw-r--r-- 1 root root   1061749 Jul 13 11:19 neural_network_dataset.pkl
drwxr-xr-x 1 root root      4096 Jul 11 13:22 sample_data


In [8]:
!pip install transformers



In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

class TransformerSeq2Seq(nn.Module):
    def __init__(self, input_dim, output_dim, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1):
        super(TransformerSeq2Seq, self).__init__()
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout)
        self.fc_out = nn.Linear(d_model, output_dim)
        self.src_tok_emb = nn.Embedding(input_dim, d_model)
        self.tgt_tok_emb = nn.Embedding(output_dim, d_model)
        self.positional_encoding = nn.Embedding(5000, d_model)

    def forward(self, src, tgt):
        src_seq_len, N = src.shape
        tgt_seq_len, N = tgt.shape

        src_positions = (
            torch.arange(0, src_seq_len).unsqueeze(1).expand(src_seq_len, N).to(src.device)
        )
        tgt_positions = (
            torch.arange(0, tgt_seq_len).unsqueeze(1).expand(tgt_seq_len, N).to(tgt.device)
        )

        embed_src = self.src_tok_emb(src) + self.positional_encoding(src_positions)
        embed_tgt = self.tgt_tok_emb(tgt) + self.positional_encoding(tgt_positions)

        transformer_out = self.transformer(embed_src, embed_tgt)
        out = self.fc_out(transformer_out)

        return out



In [5]:
from transformers import BertTokenizer

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Example input and output
example_input = "Sequential((0): Linear(in_features=38809, out_features=26, bias=True) (1): LeakyReLU(negative_slope=0.01) (2): Linear(in_features=676, out_features=183, bias=True) (3): BatchNorm1d(183, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): LeakyReLU(negative_slope=0.01) (5): Conv2d(1, 25, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): Dropout(p=0.11890518865721189, inplace=False) (7): Linear(in_features=837225, out_features=8, bias=True))"
example_output = "This neural network takes an input of shape (b,38809) and produces an output of shape (b,8). It consists of 8 layers, including a Linear layer (38809 -> 26), a Leaky ReLU activation (negative_slope=0.01), a Linear layer (676 -> 183), a 1D Batch Normalization (num_features=183), a Leaky ReLU activation (negative_slope=0.01), a 2D Convolutional layer (1 -> 25, kernel_size=(3, 3)), a Dropout layer (p=0.12), a Linear layer (837225 -> 8)."

# Tokenize the input and output
input_tokens = tokenizer.tokenize(example_input)
output_tokens = tokenizer.tokenize(example_output)

print("Input Tokens:", input_tokens)
print("Output Tokens:", output_tokens)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Input Tokens: ['sequential', '(', '(', '0', ')', ':', 'linear', '(', 'in', '_', 'features', '=', '38', '##80', '##9', ',', 'out', '_', 'features', '=', '26', ',', 'bias', '=', 'true', ')', '(', '1', ')', ':', 'leak', '##yre', '##lu', '(', 'negative', '_', 'slope', '=', '0', '.', '01', ')', '(', '2', ')', ':', 'linear', '(', 'in', '_', 'features', '=', '67', '##6', ',', 'out', '_', 'features', '=', '183', ',', 'bias', '=', 'true', ')', '(', '3', ')', ':', 'batch', '##nor', '##m', '##1', '##d', '(', '183', ',', 'eps', '=', '1', '##e', '-', '05', ',', 'momentum', '=', '0', '.', '1', ',', 'af', '##fine', '=', 'true', ',', 'track', '_', 'running', '_', 'stats', '=', 'true', ')', '(', '4', ')', ':', 'leak', '##yre', '##lu', '(', 'negative', '_', 'slope', '=', '0', '.', '01', ')', '(', '5', ')', ':', 'con', '##v', '##2', '##d', '(', '1', ',', '25', ',', 'kernel', '_', 'size', '=', '(', '3', ',', '3', ')', ',', 'stride', '=', '(', '1', ',', '1', ')', ',', 'pad', '##ding', '=', '(', '1', ',', '

In [6]:
# Convert tokens to input IDs
input_ids = tokenizer.convert_tokens_to_ids(input_tokens)
output_ids = tokenizer.convert_tokens_to_ids(output_tokens)

print("Input IDs:", input_ids)
print("Output IDs:", output_ids)


Input IDs: [25582, 1006, 1006, 1014, 1007, 1024, 7399, 1006, 1999, 1035, 2838, 1027, 4229, 17914, 2683, 1010, 2041, 1035, 2838, 1027, 2656, 1010, 13827, 1027, 2995, 1007, 1006, 1015, 1007, 1024, 17271, 16363, 7630, 1006, 4997, 1035, 9663, 1027, 1014, 1012, 5890, 1007, 1006, 1016, 1007, 1024, 7399, 1006, 1999, 1035, 2838, 1027, 6163, 2575, 1010, 2041, 1035, 2838, 1027, 18677, 1010, 13827, 1027, 2995, 1007, 1006, 1017, 1007, 1024, 14108, 12131, 2213, 2487, 2094, 1006, 18677, 1010, 20383, 1027, 1015, 2063, 1011, 5709, 1010, 11071, 1027, 1014, 1012, 1015, 1010, 21358, 23460, 1027, 2995, 1010, 2650, 1035, 2770, 1035, 26319, 1027, 2995, 1007, 1006, 1018, 1007, 1024, 17271, 16363, 7630, 1006, 4997, 1035, 9663, 1027, 1014, 1012, 5890, 1007, 1006, 1019, 1007, 1024, 9530, 2615, 2475, 2094, 1006, 1015, 1010, 2423, 1010, 16293, 1035, 2946, 1027, 1006, 1017, 1010, 1017, 1007, 1010, 18045, 1027, 1006, 1015, 1010, 1015, 1007, 1010, 11687, 4667, 1027, 1006, 1015, 1010, 1015, 1007, 1007, 1006, 1020, 10

In [7]:
from transformers import BertTokenizer
import torch
from torch.utils.data import Dataset, DataLoader

class SyntheticDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        input_text, output_text = self.data[idx]

        # Tokenize input and output texts
        input_encoding = self.tokenizer(input_text, return_tensors='pt', max_length=self.max_length, truncation=True, padding='max_length')
        output_encoding = self.tokenizer(output_text, return_tensors='pt', max_length=self.max_length, truncation=True, padding='max_length')

        # Extract input and output IDs
        input_ids = input_encoding['input_ids'].squeeze(0)  # Remove batch dimension
        output_ids = output_encoding['input_ids'].squeeze(0)  # Remove batch dimension

        return input_ids, output_ids

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Example data
example_input = "Sequential((0): Linear(in_features=38809, out_features=26, bias=True) (1): LeakyReLU(negative_slope=0.01) (2): Linear(in_features=676, out_features=183, bias=True) (3): BatchNorm1d(183, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): LeakyReLU(negative_slope=0.01) (5): Conv2d(1, 25, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): Dropout(p=0.11890518865721189, inplace=False) (7): Linear(in_features=837225, out_features=8, bias=True))"
example_output = "This neural network takes an input of shape (b,38809) and produces an output of shape (b,8). It consists of 8 layers, including a Linear layer (38809 -> 26), a Leaky ReLU activation (negative_slope=0.01), a Linear layer (676 -> 183), a 1D Batch Normalization (num_features=183), a Leaky ReLU activation (negative_slope=0.01), a 2D Convolutional layer (1 -> 25, kernel_size=(3, 3)), a Dropout layer (p=0.12), a Linear layer (837225 -> 8)."

# Create dataset
data = [(example_input, example_output)]
# data = [(str(sample_df[-1][0]), sample_df[-1][1])]
# data = [(str(sample_df[i][0]), sample_df[i][1]) for i in range(len(sample_df))]
dataset = SyntheticDataset(data, tokenizer)

# Create DataLoader
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# Print the first batch to verify
for batch in dataloader:
    src, tgt = batch
    print("Source Shape:", src.shape)
    print("Target Shape:", tgt.shape)
    break


Source Shape: torch.Size([1, 512])
Target Shape: torch.Size([1, 512])


In [14]:
str(sample_df[-1][0])


'Sequential(\n  (0): Conv2d(1, 29, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n  (1): Conv1d(29, 29, kernel_size=(3,), stride=(1,), padding=(1,))\n  (2): BatchNorm1d(29, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n  (3): ReLU()\n  (4): Dropout(p=0.3357099483812358, inplace=False)\n  (5): Linear(in_features=290000, out_features=160, bias=True)\n  (6): Linear(in_features=160, out_features=4, bias=True)\n)'

In [34]:
##clean gpu
!nvidia-smi

Sat Jul 13 11:13:43 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   78C    P0              33W /  70W |  12381MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [35]:
torch.cuda.synchronize()

In [33]:
# Clear GPU memory
torch.cuda.empty_cache()

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = TransformerSeq2Seq(len(tokenizer.vocab), len(tokenizer.vocab)).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()

def train(model, dataloader, optimizer, criterion, num_epochs=10):
    model.train()

    for epoch in range(num_epochs):
        epoch_loss = 0

        for src, tgt in dataloader:
            src, tgt = src.to(device), tgt.to(device)

            optimizer.zero_grad()

            output = model(src, tgt[:-1, :])
            output = output.reshape(-1, output.shape[2])
            tgt = tgt[1:, :].reshape(-1)

            loss = criterion(output, tgt)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f'Epoch {epoch+1}, Loss: {epoch_loss / len(dataloader)}')

# Train the model
# train(model, dataloader, optimizer, criterion, num_epochs=20)




In [29]:
24*42

1008

In [16]:
import os

def train_with_checkpointing(model, dataloader, optimizer, criterion, num_epochs=10, patience=3, checkpoint_path='checkpoint.pt'):
    model.train()
    best_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        epoch_loss = 0

        for src, tgt in dataloader:
            src, tgt = src.to(device), tgt.to(device)

            optimizer.zero_grad()

            output = model(src, tgt[:-1, :])
            output = output.reshape(-1, output.shape[2])
            tgt = tgt[1:, :].reshape(-1)

            loss = criterion(output, tgt)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        avg_epoch_loss = epoch_loss / len(dataloader)
        print(f'Epoch {epoch+1}, Loss: {avg_epoch_loss}')

        # Checkpointing
        if avg_epoch_loss < best_loss:
            best_loss = avg_epoch_loss
            torch.save(model.state_dict(), checkpoint_path)
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                print('Early stopping')
                break

# Train the model with checkpointing
# train_with_checkpointing(model, dataloader, optimizer, criterion, num_epochs=20, patience=5)


Epoch 1, Loss: 2.7872646517223783
Epoch 2, Loss: 1.448282868143112
Epoch 3, Loss: 0.7340554594993591
Epoch 4, Loss: 0.31667683521906537
Epoch 5, Loss: 0.226724345769201
Epoch 6, Loss: 0.1719598188286736
Epoch 7, Loss: 0.15356939274167258
Epoch 8, Loss: 0.13517426782184178
Epoch 9, Loss: 0.12769145021835962
Epoch 10, Loss: 0.12258641835716036
Epoch 11, Loss: 0.11372723962579455
Epoch 12, Loss: 0.10256994013038892
Epoch 13, Loss: 0.09916347513596217
Epoch 14, Loss: 0.09907156607461354
Epoch 15, Loss: 0.10461718435325319
Epoch 16, Loss: 0.09511972697717803
Epoch 17, Loss: 0.09591614833426854
Epoch 18, Loss: 0.09148200991607848
Epoch 19, Loss: 0.08968120842935547
Epoch 20, Loss: 0.08350930646771476


In [18]:
!ls -al

total 4015984
drwxr-xr-x 1 root root       4096 Jul 13 10:23 .
drwxr-xr-x 1 root root       4096 Jul 13 09:30 ..
-rw-r--r-- 1 root root  374524731 Jul 13 10:42 checkpoint.pt
drwxr-xr-x 4 root root       4096 Jul 11 13:21 .config
-rw-r--r-- 1 root root 3737814002 Jul 13 09:33 neural_network_dataset.pkl
drwxr-xr-x 1 root root       4096 Jul 11 13:22 sample_data


In [17]:
!cp /content/checkpoint.pt /content/drive/MyDrive/ShodhAI/checkpoints

In [None]:
from transformers import BertTokenizer
from sklearn.metrics import f1_score
import torch
from torch.utils.data import DataLoader
import numpy as np


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def compute_f1_score(predictions, targets):
    # Tokenize predictions and targets
    pred_tokens = [pred.split() for pred in predictions]
    target_tokens = [tgt.split() for tgt in targets]

    # Flatten the lists
    pred_flat = [token for sent in pred_tokens for token in sent]
    target_flat = [token for sent in target_tokens for token in sent]

    # Get unique tokens
    unique_tokens = list(set(pred_flat + target_flat))

    # Create label encodings
    label_encoder = {token: i for i, token in enumerate(unique_tokens)}

    # Encode predictions and targets
    pred_encoded = [label_encoder[token] for token in pred_flat]
    target_encoded = [label_encoder[token] for token in target_flat]

    # Compute F1 score
    return f1_score(target_encoded, pred_encoded, average='weighted')

def test_model(model, tokenizer, test_data, batch_size=32):

    # Prepare DataLoader for testing
    test_dataset = SyntheticDataset(test_data, tokenizer)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    # Set model to evaluation mode
    model.eval()

    predictions = []
    targets = []

    with torch.no_grad():
        for src, tgt in test_dataloader:
            src, tgt = src.to(device), tgt.to(device)

            # Generate predictions
            output = model(src, tgt[:-1, :])  # Remove last token from target for prediction
            predicted_ids = torch.argmax(output, dim=-1)

            # Convert predicted IDs to tokens
            predicted_tokens = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)
            target_tokens = tokenizer.batch_decode(tgt, skip_special_tokens=True)

            # Store predictions and targets
            predictions.extend(predicted_tokens)
            targets.extend(target_tokens)

    # Compute F1 score
    f1 = compute_f1_score(predictions, targets)

    return predictions, f1

def main():
    # Initialize tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    test_data = generate_dataset(10) # [(example source, example_traget)]

    # Initialize and load your TransformerSeq2Seq model
    model = TransformerSeq2Seq(input_dim=len(tokenizer.vocab), output_dim=len(tokenizer.vocab))
    model.load_state_dict(torch.load('/content/drive/MyDrive/ShodhAI/checkpoints/checkpoint.pt', map_location= torch.device('cpu')))  # Load your trained model checkpoint
    model.to(device)

    # Test the model
    predictions, f1_score = test_model(model, tokenizer, test_data)

    # Print predictions and F1 score
    for idx, prediction in enumerate(predictions):
        print(f"Example {idx + 1} Prediction:", prediction)

    print(f"F1 Score: {f1_score:.4f}")

if __name__ == "__main__":
    main()

In [None]:
from transformers import BertTokenizer
from sklearn.metrics import f1_score
import torch
from torch.utils.data import DataLoader
import numpy as np

def compute_f1_score(predictions, targets):
    # Ensure predictions and targets have the same length
    min_len = min(len(predictions), len(targets))
    predictions = predictions[:min_len]
    targets = targets[:min_len]

    # Tokenize predictions and targets
    pred_tokens = [pred.split() for pred in predictions]
    target_tokens = [tgt.split() for tgt in targets]

    # Flatten the lists
    pred_flat = [token for sent in pred_tokens for token in sent]
    target_flat = [token for sent in target_tokens for token in sent]

    # Get unique tokens
    unique_tokens = list(set(pred_flat + target_flat))

    # Create label encodings
    label_encoder = {token: i for i, token in enumerate(unique_tokens)}

    # Encode predictions and targets
    pred_encoded = [label_encoder[token] for token in pred_flat]
    target_encoded = [label_encoder[token] for token in target_flat]

    # Compute F1 score
    return f1_score(target_encoded, pred_encoded, average='weighted')

def test_model(model, tokenizer, test_data, batch_size=32):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Prepare DataLoader for testing
    test_dataset = SyntheticDataset(test_data, tokenizer)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    # Set model to evaluation mode
    model.eval()

    predictions = []
    targets = []

    with torch.no_grad():
        for src, tgt in test_dataloader:
            src, tgt = src.to(device), tgt.to(device)

            # Generate predictions
            output = model(src, tgt[:-1, :])  # Remove last token from target for prediction
            predicted_ids = torch.argmax(output, dim=-1)

            # Convert predicted IDs to tokens
            predicted_tokens = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)
            target_tokens = tokenizer.batch_decode(tgt, skip_special_tokens=True)

            # Store predictions and targets
            predictions.extend(predicted_tokens)
            targets.extend(target_tokens)

    # Compute F1 score
    f1 = compute_f1_score(predictions, targets)

    return predictions, targets, f1

def main():
    # Initialize tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    test_data = generate_dataset(10) # [(example source, example_target)]

    # Initialize and load your TransformerSeq2Seq model
    model = TransformerSeq2Seq(input_dim=len(tokenizer.vocab), output_dim=len(tokenizer.vocab))
    model.load_state_dict(torch.load('/content/drive/MyDrive/ShodhAI/checkpoints/checkpoint.pt', map_location= torch.device('cpu')))  # Load your trained model checkpoint
    model.to('cpu')

    # Test the model
    predictions, targets, f1_score = test_model(model, tokenizer, test_data)

    # Print predictions, targets, and F1 score
    for idx, (pred, target) in enumerate(zip(predictions, targets)):
        print(f"Example {idx + 1}:")
        print(f"  Prediction: {pred}")
        print(f"  Target:     {target}")
        print()

    print(f"F1 Score: {f1_score:.4f}")
    print(f"Number of predictions: {len(predictions)}")
    print(f"Number of targets: {len(targets)}")

if __name__ == "__main__":
    main()