In [1]:
pip install torch torchvision torchaudio

Defaulting to user installation because normal site-packages is not writeable





pip install datasets

In [None]:
import torch
print(torch.__version__)
print("CUDA Available:", torch.cuda.is_available())

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from datasets import load_dataset
from collections import Counter
import numpy as np
import random

# Load Shakespeare dataset
dataset = load_dataset("tiny_shakespeare", trust_remote_code=True)

# Convert text to lowercase and split into words
text = dataset['train']['text'][0].lower().split()

# Build vocabulary
word_counts = Counter(text)
vocab = sorted(word_counts, key=word_counts.get, reverse=True)
word2idx = {word: idx for idx, word in enumerate(vocab)}
idx2word = {idx: word for word, idx in word2idx.items()}

# Convert words to indices
indexed_text = [word2idx[word] for word in text]

# Create training sequences (e.g., sequence length = 5)
SEQ_LEN = 5
X, Y = [], []
for i in range(len(indexed_text) - SEQ_LEN):
    X.append(indexed_text[i:i+SEQ_LEN])
    Y.append(indexed_text[i+SEQ_LEN])

X = torch.tensor(X)
Y = torch.tensor(Y)

# Train-test split (80-20)
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
Y_train, Y_test = Y[:train_size], Y[train_size:]


In [None]:
VOCAB_SIZE = Y_train.max().item() + 1


In [None]:
# VOCAB_SIZE = Y_train.max().item() + 1
# print(f"Unique token indices: {sorted(set(Y_train.tolist()))[-10:]}")  # Last 10 indices
# print(f"Max index in dataset: {Y_train.max().item()}, VOCAB_SIZE: {VOCAB_SIZE}")

In [None]:
# print(f"Max index in Y_train: {Y_train.max().item()}, Min index: {Y_train.min().item()}, VOCAB_SIZE: {VOCAB_SIZE}")
# Y_train = torch.clamp(Y_train, min=0, max=VOCAB_SIZE - 1).long()

# assert Y_train.max().item() < VOCAB_SIZE, f"Error: Y_train contains {Y_train.max().item()} which is out of range!"

In [None]:
# print(f"Max index in Y_train: {Y_train.max().item()}, Min index: {Y_train.min().item()}, VOCAB_SIZE: {VOCAB_SIZE}")
# # Y_train = toch.clamp(Y_train, min=0, max=VOCAB_SIZE - 1).long()

# assert Y_train.max().item() < VOCAB_SIZE, f"Error: Y_train contains {Y_train.max().item()} which is out of range!"

In [None]:
# invalid_indices = batch_Y[batch_Y >= VOCAB_SIZE]
# if len(invalid_indices) > 0:
#     print(f"Invalid indices: {invalid_indices}")
# batch_Y = torch.clamp(batch_Y, min=0, max=VOCAB_SIZE - 1).long()

In [None]:
# batch_Y = torch.clamp(batch_Y, min=0, max=VOCAB_SIZE - 1).long() 

In [None]:
# print(f"Max target index: {batch_Y.max().item()}")
# print(f"VOCAB_SIZE: {VOCAB_SIZE}")
# assert batch_Y.max().item() < VOCAB_SIZE, f"Index {batch_Y.max().item()} is out of bounds! Max index is {VOCAB_SIZE - 1}."

In [None]:
# batch_X = torch.clamp(batch_X, min=0, max=VOCAB_SIZE - 1)
# print(f"VOCAB_SIZE: {VOCAB_SIZE}")
# print(f"Max index in batch_X: {batch_X.max().item()}")
# print(f"Min index in batch_X: {batch_X.min().item()}")

In [None]:
# print(f"Max index in batch_X: {batch_X.max().item()}")
# print(f"Min index in batch_X: {batch_X.min().item()}")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Define the RNN model
class VanillaRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(VanillaRNN, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # Get last time-step output
        return out

# Model parameters
VOCAB_SIZE = min(10000, len(vocab))  # Limit vocab size to 10,000 words
EMBEDDING_DIM = 128
HIDDEN_DIM = 256
BATCH_SIZE = 32
EPOCHS = 15
LR = 0.001
train_losses = []

# Ensure tensors are in correct data types
X_train = X_train.long()  # Ensure it's integer-based (word indices)
Y_train = Y_train.long()  # Ensure labels are class indices

# Create dataset and DataLoader
SEQ_LENGTH = 5  # Number of words to use as context
BATCH_SIZE = 32

class ShakespeareDataset(Dataset):
    def __init__(self, indexed_text, seq_length):
        self.data = []
        for i in range(len(indexed_text) - seq_length):
            x = torch.tensor(indexed_text[i:i+seq_length])
            y = torch.tensor(indexed_text[i+seq_length])
            # Clamp values to ensure they are within valid range
            x = torch.clamp(x, min=0, max=VOCAB_SIZE - 1)
            y = torch.clamp(y, min=0, max=VOCAB_SIZE - 1)
            self.data.append((x, y))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


dataset = ShakespeareDataset(indexed_text, SEQ_LENGTH)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize model, loss function, and optimizer
model = VanillaRNN(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

random_embeddings = model.embedding.weight.data.clone().cpu().numpy()

# Training loop with batch processing
for epoch in range(EPOCHS):
    total_loss = 0
    for batch_X, batch_Y in train_loader:
        optimizer.zero_grad()
        output = model(batch_X)
        loss = criterion(output, batch_Y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    train_losses.append(avg_loss)
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {avg_loss:.4f}")

learned_embeddings = model.embedding.weight.data.clone().cpu().numpy()

# Save the model
torch.save(model.state_dict(), "shakespeare_rnn.pth")


In [None]:
# Assume `vocab` is a set of unique words in your dataset
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}

VOCAB_SIZE = len(word_to_idx)  # Update vocab size


In [None]:
import torch.nn.functional as F

def generate_text(seed_text, model, word_to_idx, idx_to_word, num_words=10, temperature=1.0):
    model.eval()
    words = seed_text.lower().split()

    for _ in range(num_words):
        input_seq = [word_to_idx.get(word, 0) for word in words[-SEQ_LENGTH:]]
        input_tensor = torch.tensor([input_seq])

        with torch.no_grad():
            output = model(input_tensor)

            # Apply temperature scaling
            scaled_logits = output / temperature
            probabilities = F.softmax(scaled_logits, dim=1)

            # Sample from the probability distribution
            next_word_idx = torch.multinomial(probabilities, num_samples=1).item()
            next_word = idx_to_word[next_word_idx]

        words.append(next_word)

    return " ".join(words)

# Try generating with a moderate temperature (0.7-1.2)
generated_sentence = generate_text("to be or not to", model, word_to_idx, idx_to_word, num_words=10, temperature=0.8)
print("\nGenerated Text:", generated_sentence)


Evaluate the performance

In [None]:
def compute_perplexity(loader, model):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch_X, batch_Y in loader:
            output = model(batch_X)
            loss = criterion(output, batch_Y)
            total_loss += loss.item()

    avg_loss = total_loss / len(loader)
    perplexity = torch.exp(torch.tensor(avg_loss)).item()
    return perplexity

train_perplexity = compute_perplexity(train_loader, model)
test_perplexity = compute_perplexity(test_loader, model)

print(f"\nTrain Perplexity: {train_perplexity:.2f}")
print(f"Test Perplexity: {test_perplexity:.2f}")

In [None]:
def compute_word_accuracy(model, test_loader):
    model.eval()  # Set to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_X, batch_Y in test_loader:
            output = model(batch_X)  # Get model predictions
            predicted_indices = output.argmax(dim=1)  # Get predicted word index
            correct += (predicted_indices == batch_Y).sum().item()  # Count correct predictions
            total += batch_Y.size(0)  # Total words processed

    accuracy = (correct / total) * 100
    return accuracy

# Compute and print word-level accuracy
word_accuracy = compute_word_accuracy(model, test_loader)
print(f"Word-Level Accuracy: {word_accuracy:.2f}%")

In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset

# Ensure model and data are properly defined
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model and data to GPU
X_train = X_train.to(device)
Y_train = Y_train.to(device)
model = model.to(device)
model.train()  # ✅ Ensure training mode

# Hyperparameters
EPOCHS = 10
BATCH_SIZE = 16
VOCAB_SIZE = 5000
LEARNING_RATE = 0.001

# Ensure input values are within range
X_train = torch.clamp(X_train, min=0, max=VOCAB_SIZE - 1)
Y_train = torch.clamp(Y_train, min=0, max=VOCAB_SIZE - 1)

# Define DataLoader
dataset = TensorDataset(X_train, Y_train)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Enable gradient checkpointing if available
if hasattr(model, "gradient_checkpointing_enable"):
    model.gradient_checkpointing_enable()

# Mixed precision training
scaler = torch.cuda.amp.GradScaler()

# Clear GPU memory
torch.cuda.empty_cache()

# Loss tracking
losses = []

# Training loop
for epoch in range(EPOCHS):
    epoch_loss = 0.0
    model.train()  # ✅ Ensure training mode each epoch

    for X_batch, Y_batch in dataloader:
        X_batch, Y_batch = X_batch.to(device), Y_batch.to(device)

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():  # Mixed precision
            output = model(X_batch)
            loss = criterion(output, Y_batch)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(dataloader)
    losses.append(avg_loss)
    print(f'Epoch {epoch+1}/{EPOCHS}, Loss: {avg_loss:.4f}')

# Set to evaluation mode for inference
model.eval()

# Plot loss curve
plt.plot(range(1, EPOCHS + 1), losses, marker='o', linestyle='-')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.grid()
plt.show()


Compare learned embeddings with randomly initialized ones.

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Compute cosine similarity between random and learned embeddings
similarity = np.mean(cosine_similarity(random_embeddings, learned_embeddings))
print(f"Average Cosine Similarity between Random & Learned Embeddings: {similarity:.4f}")

In [None]:


import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# Define device (use GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Ensure the model is on the correct device
model.to(device)

# Function to calculate word-level accuracy
def calculate_word_level_accuracy(model, data_loader):
    correct = 0
    total = 0

    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        for batch_X, batch_Y in data_loader:
            batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)  # Move to correct device
            output = model(batch_X)  # Forward pass
            predictions = torch.argmax(output, dim=1)  # Get predicted word index
            correct += (predictions == batch_Y).sum().item()  # Compare predictions
            total += batch_Y.size(0)

    return correct / total if total > 0 else 0  # Accuracy as a fraction

# Function to calculate perplexity
def calculate_perplexity(model, data_loader):
    total_loss = 0
    total_words = 0
    criterion = nn.CrossEntropyLoss()

    model.eval()
    with torch.no_grad():
        for batch_X, batch_Y in data_loader:
            batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)  # Move to correct device
            output = model(batch_X)
            loss = criterion(output, batch_Y)
            total_loss += loss.item() * batch_Y.size(0)
            total_words += batch_Y.size(0)

    avg_loss = total_loss / total_words if total_words > 0 else float('inf')
    return np.exp(avg_loss)  # Perplexity = exp(average loss)

# Convert embeddings to torch tensors and move them to the correct device
random_embeddings_tensor = torch.tensor(random_embeddings, device=device)
learned_embeddings_tensor = torch.tensor(learned_embeddings, device=device)

# Copy random embeddings to model
model.embedding.weight.data.copy_(random_embeddings_tensor)

# Calculate accuracy and perplexity for random embeddings
random_accuracy = calculate_word_level_accuracy(model, test_loader)
random_perplexity = calculate_perplexity(model, test_loader)

# Copy learned embeddings to model
model.embedding.weight.data.copy_(learned_embeddings_tensor)

# Calculate accuracy and perplexity for learned embeddings
learned_accuracy = calculate_word_level_accuracy(model, test_loader)
learned_perplexity = calculate_perplexity(model, test_loader)

# Print Results
print(f"Random Embeddings - Word-Level Accuracy: {random_accuracy:.4f}, Perplexity: {random_perplexity:.4f}")
print(f"Learned Embeddings - Word-Level Accuracy: {learned_accuracy:.4f}, Perplexity: {learned_perplexity:.4f}")


RNN model using pretrained word embeddings

In [None]:
pip install torch torchvision torchaudio gensim numpy matplotlib seaborn

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import gensim.downloader as api
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from datasets import load_dataset
import torch.nn.functional as F
import math

# =====================
# 1. Load Shakespeare Dataset
# =====================
dataset = load_dataset("tiny_shakespeare")
train_text = dataset['train']['text']
train_text = " ".join(train_text)  # Convert list of sentences into one large text corpus

# Tokenize text into words
words = train_text.lower().split()
vocab = {word: idx for idx, word in enumerate(set(words))}
VOCAB_SIZE = len(vocab)

# Convert words to indices
indexed_text = [vocab[word] for word in words]

# =====================
# 2. Prepare Sequences for Training
# =====================
SEQ_LENGTH = 5  # Number of previous words used to predict next word
class ShakespeareDataset(Dataset):
    def __init__(self, indexed_text, seq_length):
        self.data = []
        for i in range(len(indexed_text) - seq_length):
            self.data.append((indexed_text[i:i+seq_length], indexed_text[i+seq_length]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x, y = self.data[idx]
        return torch.tensor(x), torch.tensor(y)

dataset = ShakespeareDataset(indexed_text, SEQ_LENGTH)

# Split dataset into train & validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# =====================
# 3. Load Pretrained Word2Vec Embeddings
# =====================
word2vec = api.load("word2vec-google-news-300")  # 300-dim embeddings
EMBEDDING_DIM = 300
HIDDEN_DIM = 128
EPOCHS = 10
LR = 0.0025

# Prepare pretrained embeddings matrix
embedding_matrix = np.random.uniform(-0.1, 0.1, (VOCAB_SIZE, EMBEDDING_DIM))
for word, idx in vocab.items():
    if word in word2vec:
        embedding_matrix[idx] = word2vec[word]

# =====================
# 4. Define the RNN Model
# =====================
class VanillaRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, pretrained_embeddings=None):
        super(VanillaRNN, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)

        if pretrained_embeddings is not None:
            self.embedding.weight = nn.Parameter(torch.tensor(pretrained_embeddings, dtype=torch.float32))
            self.embedding.weight.requires_grad = False  # Freeze embeddings

        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # Get last time-step output
        return out

# =====================
# 5. Train & Evaluate Models
# =====================
models = {
    "Random Embeddings": VanillaRNN(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM),
    "Pretrained Word2Vec": VanillaRNN(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, pretrained_embeddings=embedding_matrix)
}

# Loss Function & Optimizer
def train_model(model, train_loader, val_loader):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=1e-5)
    train_losses, val_losses = [], []

    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0

        for batch_X, batch_Y in train_loader:
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_Y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        train_losses.append(avg_loss)

        # Validation Loss
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_Y in val_loader:
                output = model(batch_X)
                val_loss += criterion(output, batch_Y).item()

        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)

        print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {avg_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

    return train_losses, val_losses

# Train both models & Store Loss Curves
loss_curves = {}
for name, model in models.items():
    print(f"\nTraining {name}...")
    loss_curves[name] = train_model(model, train_loader, val_loader)

# =====================
# 6. Evaluate Model Performance
# =====================
# def evaluate_model(model, val_loader):
#     model.eval()
#     correct = 0
#     total = 0
#     loss = 0
#     perplexity = 0
#     all_preds, all_labels = [], []

#     with torch.no_grad():
#         for batch_X, batch_Y in val_loader:
#             output = model(batch_X)
#             loss += F.cross_entropy(output, batch_Y).item()
#             preds = torch.argmax(output, dim=1)

#             # Calculate Accuracy
#             correct += (preds == batch_Y).sum().item()
#             total += batch_Y.size(0)

#             # Store predictions for confusion matrix
#             all_preds.extend(preds.cpu().numpy())
#             all_labels.extend(batch_Y.cpu().numpy())

#             # Perplexity Calculation
#             log_probs = F.log_softmax(output, dim=1)
#             perplexity += torch.exp(-log_probs.gather(1, batch_Y.view(-1, 1)).mean()).item()

#     accuracy = correct / total
#     avg_loss = loss / len(val_loader)
#     avg_perplexity = perplexity / len(val_loader)

#     return avg_loss, accuracy, avg_perplexity, all_preds, all_labels

# # Compute evaluation metrics
# metrics = {}
# for name, model in models.items():
#     print(f"\nEvaluating {name}...")
#     metrics[name] = evaluate_model(model, val_loader)

# # =====================
# # 7. Plot Loss Curve
# # =====================
# plt.figure(figsize=(10, 5))
# for name, losses in loss_curves.items():
#     plt.plot(losses, label=name)
# plt.xlabel("Epochs")
# plt.ylabel("Loss")
# plt.title("Training Loss Curve")
# plt.legend()
# plt.show()

# # =====================
# # 8. Confusion Matrix
# # =====================
# for name, (loss, accuracy, perplexity, preds, labels) in metrics.items():
#     print(f"\n{name} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}, Perplexity: {perplexity:.4f}")

#     cm = confusion_matrix(labels, preds)
#     plt.figure(figsize=(8, 6))
#     sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=list(vocab.keys())[:10], yticklabels=list(vocab.keys())[:10])
#     plt.xlabel("Predicted")
#     plt.ylabel("Actual")
#     plt.title(f"Confusion Matrix for {name}")
#     plt.show()

import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import gc

# Define model evaluation function
def evaluate_model(model, val_loader):
    loss_curves = []
    predictions = []
    labels = []

    criterion = torch.nn.CrossEntropyLoss()

    model.eval()  # Set model to evaluation mode
    total_loss = 0
    total_correct = 0
    total_words = 0

    with torch.no_grad():
        for batch_X, batch_Y in val_loader:
            output = model(batch_X)  # Forward pass
            loss = criterion(output, batch_Y)  # Compute loss
            total_loss += loss.item() * batch_Y.size(0)
            total_words += batch_Y.size(0)

            preds = torch.argmax(output, dim=1)
            predictions.extend(preds.cpu().numpy())
            labels.extend(batch_Y.cpu().numpy())

            total_correct += (preds == batch_Y).sum().item()

    avg_loss = total_loss / total_words
    accuracy = total_correct / total_words
    perplexity = np.exp(avg_loss) if avg_loss < 10 else float('inf')  # Prevent overflow

    return avg_loss, accuracy, perplexity, predictions, labels

# Evaluate multiple models
metrics = {}
for name, model in models.items():
    print(f"\nEvaluating {name}...")
    metrics[name] = evaluate_model(model, val_loader)

# =====================
# 7. Plot Loss Curve
# =====================
plt.figure(figsize=(10, 5))
for name, losses in loss_curves.items():
    plt.plot(losses, label=name)
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training Loss Curve")
plt.legend()
plt.show()

# =====================
# 8. Confusion Matrix
# =====================
for name, (loss, accuracy, perplexity, preds, labels) in metrics.items():
    print(f"\n{name} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}, Perplexity: {perplexity:.4f}")

    # Ensure predictions and labels are flattened
    preds = np.array(preds).flatten()
    labels = np.array(labels).flatten()

    # Limit confusion matrix size for memory efficiency (top 10 labels)
    top_n = 10
    cm = confusion_matrix(labels, preds)

    plt.figure(figsize=(8, 6))
    sns.heatmap(
        cm[:top_n, :top_n], annot=True, fmt='d', cmap='Blues',
        xticklabels=list(vocab.keys())[:top_n],
        yticklabels=list(vocab.keys())[:top_n]
    )
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix for {name}")
    plt.show()

    # Clean up memory after each evaluation
    del preds, labels
    gc.collect()



Sentences generated from word2vec

In [None]:
import torch
import torch.nn.functional as F
import numpy as np

def generate_text(model, start_text, vocab, reverse_vocab, max_length=10, temperature=1.0):
    model.eval()
    words = start_text.lower().split()
    input_seq = [vocab[word] for word in words if word in vocab]
    generated_words = words[:]  # Start with the given words

    with torch.no_grad():
        for _ in range(max_length):
            if len(input_seq) < SEQ_LENGTH:
                padded_seq = [0] * (SEQ_LENGTH - len(input_seq)) + input_seq  # Pad with zeros if needed
            else:
                padded_seq = input_seq[-SEQ_LENGTH:]  # Take last SEQ_LENGTH words

            input_tensor = torch.tensor([padded_seq]).to(torch.int64)  # Convert to tensor
            output = model(input_tensor).squeeze(0)  # Get raw logits

            # Apply temperature scaling
            scaled_logits = output / temperature
            probabilities = F.softmax(scaled_logits, dim=-1).cpu().numpy()

            # Sample next word based on probability distribution
            predicted_index = np.random.choice(len(probabilities), p=probabilities)

            # Convert index to word
            if predicted_index in reverse_vocab:
                next_word = reverse_vocab[predicted_index]
                generated_words.append(next_word)
                input_seq.append(predicted_index)
            else:
                break  # Stop if the word is not in vocab

    return " ".join(generated_words)

# Reverse vocabulary (index → word mapping)
reverse_vocab = {idx: word for word, idx in vocab.items()}

# Generate text using the trained model with Word2Vec and temperature
start_text = "to be"
generated_text_word2vec = generate_text(models["Pretrained Word2Vec"], start_text, vocab, reverse_vocab, temperature=0.8)
print("Generated Text (Word2Vec):", generated_text_word2vec)

# Generate text using the randomly initialized embeddings model with temperature
generated_text_random = generate_text(models["Random Embeddings"], start_text, vocab, reverse_vocab, temperature=0.8)
print("Generated Text (Random Embeddings):", generated_text_random)
