<a href="https://colab.research.google.com/github/MasterNathan01/4106/blob/main/WatersNathan_801283595_HW5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
!pip install torchinfo

#Problem 1 (10)
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import time
import torchinfo

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

text = '''Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology.'''

sequence_length = 10
input_sequences = [text[i:i + sequence_length] for i in range(len(text) - sequence_length)]
next_characters = [text[i + sequence_length] for i in range(len(text) - sequence_length)]

unique_characters = sorted(list(set(text)))
char_to_index = {char: idx for idx, char in enumerate(unique_characters)}

X_tensor = torch.tensor([[char_to_index[char] for char in seq] for seq in input_sequences], dtype=torch.long)
y_tensor = torch.tensor([char_to_index[label] for label in next_characters], dtype=torch.long)
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

class CharacterLevelTransformer(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_size, num_encoder_layers, num_attention_heads):
        super(CharacterLevelTransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        encoder_layer = nn.TransformerEncoderLayer(embedding_dim, num_attention_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
        self.output_layer = nn.Linear(embedding_dim, output_size)

    def forward(self, input_sequence):
        embedded = self.embedding(input_sequence)
        embedded = embedded.permute(1, 0, 2)
        transformer_output = self.transformer_encoder(embedded)
        last_output = transformer_output[-1, :, :]
        return self.output_layer(last_output)

embedding_dim = 128
num_encoder_layers = 3
num_attention_heads = 2
learning_rate = 0.001
num_epochs = 50


model = CharacterLevelTransformer(
    vocab_size=len(unique_characters),
    embedding_dim=embedding_dim,
    output_size=len(unique_characters),
    num_encoder_layers=num_encoder_layers,
    num_attention_heads=num_attention_heads
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


sample_input = torch.randint(0, len(unique_characters), (batch_size, sequence_length)).to(device)
summary = torchinfo.summary(model, input_data=sample_input)
print(summary)


total_training_start = time.time()
for epoch in range(num_epochs):
    model.train()
    epoch_start = time.time()
    total_loss = 0

    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()


    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for val_x, val_y in val_loader:
            val_x, val_y = val_x.to(device), val_y.to(device)
            val_outputs = model(val_x)
            loss = criterion(val_outputs, val_y)
            val_loss += loss.item()
            predictions = val_outputs.argmax(dim=1)
            correct += (predictions == val_y).sum().item()
            total += val_y.size(0)

    if (epoch + 1) % 5 == 0:
        epoch_time = time.time() - epoch_start
        avg_train_loss = total_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = correct / total
        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, "
              f"Val Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.4f}, "
              f"Time: {epoch_time:.2f}s")

total_training_end = time.time()
print(f"Total Training Time: {total_training_end - total_training_start:.2f} seconds")

Using Device: cuda
Layer (type:depth-idx)                        Output Shape              Param #
CharacterLevelTransformer                     [64, 45]                  --
├─Embedding: 1-1                              [64, 10, 128]             5,760
├─TransformerEncoder: 1-2                     [10, 64, 128]             --
│    └─ModuleList: 2-1                        --                        --
│    │    └─TransformerEncoderLayer: 3-1      [10, 64, 128]             593,024
│    │    └─TransformerEncoderLayer: 3-2      [10, 64, 128]             593,024
│    │    └─TransformerEncoderLayer: 3-3      [10, 64, 128]             593,024
├─Linear: 1-3                                 [64, 45]                  5,805
Total params: 1,790,637
Trainable params: 1,790,637
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 16.55
Input size (MB): 0.01
Forward/backward pass size (MB): 38.03
Params size (MB): 6.37
Estimated Total Size (MB): 44.41




Epoch 5, Train Loss: 2.1820, Val Loss: 2.3450, Accuracy: 0.3166, Time: 0.27s
Epoch 10, Train Loss: 1.7661, Val Loss: 2.4630, Accuracy: 0.3396, Time: 0.28s
Epoch 15, Train Loss: 1.3546, Val Loss: 2.6383, Accuracy: 0.3669, Time: 0.27s
Epoch 20, Train Loss: 0.9777, Val Loss: 2.9458, Accuracy: 0.3690, Time: 0.27s
Epoch 25, Train Loss: 0.7313, Val Loss: 3.2068, Accuracy: 0.3501, Time: 0.27s
Epoch 30, Train Loss: 0.5677, Val Loss: 3.4068, Accuracy: 0.3438, Time: 0.29s
Epoch 35, Train Loss: 0.5091, Val Loss: 3.7381, Accuracy: 0.3375, Time: 0.28s
Epoch 40, Train Loss: 0.4042, Val Loss: 3.9802, Accuracy: 0.3249, Time: 0.27s
Epoch 45, Train Loss: 0.3617, Val Loss: 4.2139, Accuracy: 0.3229, Time: 0.27s
Epoch 50, Train Loss: 0.3314, Val Loss: 4.0908, Accuracy: 0.3312, Time: 0.26s
Total Training Time: 13.66 seconds


In [30]:
!pip install torchinfo

#Problem 1 (20)
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import time
import torchinfo

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

text = '''Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology.'''

sequence_length = 20
input_sequences = [text[i:i + sequence_length] for i in range(len(text) - sequence_length)]
next_characters = [text[i + sequence_length] for i in range(len(text) - sequence_length)]

unique_characters = sorted(list(set(text)))
char_to_index = {char: idx for idx, char in enumerate(unique_characters)}

X_tensor = torch.tensor([[char_to_index[char] for char in seq] for seq in input_sequences], dtype=torch.long)
y_tensor = torch.tensor([char_to_index[label] for label in next_characters], dtype=torch.long)
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

class CharacterLevelTransformer(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_size, num_encoder_layers, num_attention_heads):
        super(CharacterLevelTransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        encoder_layer = nn.TransformerEncoderLayer(embedding_dim, num_attention_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
        self.output_layer = nn.Linear(embedding_dim, output_size)

    def forward(self, input_sequence):
        embedded = self.embedding(input_sequence)
        embedded = embedded.permute(1, 0, 2)
        transformer_output = self.transformer_encoder(embedded)
        last_output = transformer_output[-1, :, :]
        return self.output_layer(last_output)

embedding_dim = 128
num_encoder_layers = 3
num_attention_heads = 2
learning_rate = 0.001
num_epochs = 50


model = CharacterLevelTransformer(
    vocab_size=len(unique_characters),
    embedding_dim=embedding_dim,
    output_size=len(unique_characters),
    num_encoder_layers=num_encoder_layers,
    num_attention_heads=num_attention_heads
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


sample_input = torch.randint(0, len(unique_characters), (batch_size, sequence_length)).to(device)
summary = torchinfo.summary(model, input_data=sample_input)
print(summary)


total_training_start = time.time()
for epoch in range(num_epochs):
    model.train()
    epoch_start = time.time()
    total_loss = 0

    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()


    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for val_x, val_y in val_loader:
            val_x, val_y = val_x.to(device), val_y.to(device)
            val_outputs = model(val_x)
            loss = criterion(val_outputs, val_y)
            val_loss += loss.item()
            predictions = val_outputs.argmax(dim=1)
            correct += (predictions == val_y).sum().item()
            total += val_y.size(0)

    if (epoch + 1) % 5 == 0:
        epoch_time = time.time() - epoch_start
        avg_train_loss = total_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = correct / total
        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, "
              f"Val Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.4f}, "
              f"Time: {epoch_time:.2f}s")

total_training_end = time.time()
print(f"Total Training Time: {total_training_end - total_training_start:.2f} seconds")

Using Device: cuda
Layer (type:depth-idx)                        Output Shape              Param #
CharacterLevelTransformer                     [64, 45]                  --
├─Embedding: 1-1                              [64, 20, 128]             5,760
├─TransformerEncoder: 1-2                     [20, 64, 128]             --
│    └─ModuleList: 2-1                        --                        --
│    │    └─TransformerEncoderLayer: 3-1      [20, 64, 128]             593,024
│    │    └─TransformerEncoderLayer: 3-2      [20, 64, 128]             593,024
│    │    └─TransformerEncoderLayer: 3-3      [20, 64, 128]             593,024
├─Linear: 1-3                                 [64, 45]                  5,805
Total params: 1,790,637
Trainable params: 1,790,637
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 32.36
Input size (MB): 0.01
Forward/backward pass size (MB): 76.04
Params size (MB): 6.37
Estimated Total Size (MB): 82.43
Epoch 5, Train Loss: 2.3204, Val Loss: 2.5551,

In [31]:
!pip install torchinfo

#Problem 1 (30)
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import time
import torchinfo

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

text = '''Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology.'''

sequence_length = 30
input_sequences = [text[i:i + sequence_length] for i in range(len(text) - sequence_length)]
next_characters = [text[i + sequence_length] for i in range(len(text) - sequence_length)]

unique_characters = sorted(list(set(text)))
char_to_index = {char: idx for idx, char in enumerate(unique_characters)}

X_tensor = torch.tensor([[char_to_index[char] for char in seq] for seq in input_sequences], dtype=torch.long)
y_tensor = torch.tensor([char_to_index[label] for label in next_characters], dtype=torch.long)
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

class CharacterLevelTransformer(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_size, num_encoder_layers, num_attention_heads):
        super(CharacterLevelTransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        encoder_layer = nn.TransformerEncoderLayer(embedding_dim, num_attention_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
        self.output_layer = nn.Linear(embedding_dim, output_size)

    def forward(self, input_sequence):
        embedded = self.embedding(input_sequence)
        embedded = embedded.permute(1, 0, 2)
        transformer_output = self.transformer_encoder(embedded)
        last_output = transformer_output[-1, :, :]
        return self.output_layer(last_output)

embedding_dim = 128
num_encoder_layers = 3
num_attention_heads = 2
learning_rate = 0.001
num_epochs = 50


model = CharacterLevelTransformer(
    vocab_size=len(unique_characters),
    embedding_dim=embedding_dim,
    output_size=len(unique_characters),
    num_encoder_layers=num_encoder_layers,
    num_attention_heads=num_attention_heads
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


sample_input = torch.randint(0, len(unique_characters), (batch_size, sequence_length)).to(device)
summary = torchinfo.summary(model, input_data=sample_input)
print(summary)


total_training_start = time.time()
for epoch in range(num_epochs):
    model.train()
    epoch_start = time.time()
    total_loss = 0

    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()


    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for val_x, val_y in val_loader:
            val_x, val_y = val_x.to(device), val_y.to(device)
            val_outputs = model(val_x)
            loss = criterion(val_outputs, val_y)
            val_loss += loss.item()
            predictions = val_outputs.argmax(dim=1)
            correct += (predictions == val_y).sum().item()
            total += val_y.size(0)

    if (epoch + 1) % 5 == 0:
        epoch_time = time.time() - epoch_start
        avg_train_loss = total_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = correct / total
        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, "
              f"Val Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.4f}, "
              f"Time: {epoch_time:.2f}s")

total_training_end = time.time()
print(f"Total Training Time: {total_training_end - total_training_start:.2f} seconds")

Using Device: cuda
Layer (type:depth-idx)                        Output Shape              Param #
CharacterLevelTransformer                     [64, 45]                  --
├─Embedding: 1-1                              [64, 30, 128]             5,760
├─TransformerEncoder: 1-2                     [30, 64, 128]             --
│    └─ModuleList: 2-1                        --                        --
│    │    └─TransformerEncoderLayer: 3-1      [30, 64, 128]             593,024
│    │    └─TransformerEncoderLayer: 3-2      [30, 64, 128]             593,024
│    │    └─TransformerEncoderLayer: 3-3      [30, 64, 128]             593,024
├─Linear: 1-3                                 [64, 45]                  5,805
Total params: 1,790,637
Trainable params: 1,790,637
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 48.17
Input size (MB): 0.02
Forward/backward pass size (MB): 114.06
Params size (MB): 6.37
Estimated Total Size (MB): 120.44
Epoch 5, Train Loss: 2.3372, Val Loss: 2.661

In [40]:
# PROBLEM 2(20)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

#URL form Shakespeareloader.py
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
raw_text = response.text


sequence_len = 20
trimmed_text = raw_text[:sequence_len * (len(raw_text) // sequence_len)]
vocab = sorted(list(set(trimmed_text)))
char_to_index = {char: idx for idx, char in enumerate(vocab)}
index_to_char = {idx: char for idx, char in enumerate(vocab)}

encoded = [char_to_index[char] for char in trimmed_text]

#input-output pairs
input_sequences = []
target_characters = []
for i in range(len(encoded) - sequence_len):
    input_sequences.append(encoded[i:i+sequence_len])
    target_characters.append(encoded[i+sequence_len])

input_tensor = torch.tensor(input_sequences, dtype=torch.long)
target_tensor = torch.tensor(target_characters, dtype=torch.long)


class ShakespeareDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

#datasets and loaders
batch_size = 128
full_dataset = ShakespeareDataset(input_tensor, target_tensor)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)

# Tx model
class TransformerCharModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, output_dim, model_type='Transformer',
                 num_layers=2, num_heads=2, ff_dim=256, dropout=0.1):
        super(TransformerCharModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        if model_type == 'Transformer':
            encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads,
                                                       dim_feedforward=ff_dim, dropout=dropout)
            self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        else:
            raise ValueError("Unsupported model type.")
        self.fc = nn.Linear(embed_dim, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        encoded_output = self.encoder(embedded)
        return self.fc(encoded_output[:, -1, :])  # Last time step

#training function
def train_model(model_type, train_loader, val_loader, device):
    model = TransformerCharModel(
        vocab_size=len(vocab),
        embed_dim=model_hidden_dim,
        output_dim=len(vocab),
        model_type=model_type,
        num_layers=num_encoder_layers,
        num_heads=num_attention_heads,
        ff_dim=feedforward_dim,
        dropout=model_dropout
    ).to(device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0
        for batch_inputs, batch_targets in train_loader:
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
            optimizer.zero_grad()
            logits = model(batch_inputs)
            loss = loss_fn(logits, batch_targets)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * batch_inputs.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)

        #validation
        model.eval()
        total_val_loss = 0.0
        correct_preds = 0
        total_preds = 0
        with torch.no_grad():
            for val_inputs, val_targets in val_loader:
                val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
                val_logits = model(val_inputs)
                loss = loss_fn(val_logits, val_targets)
                total_val_loss += loss.item() * val_inputs.size(0)
                predictions = torch.argmax(val_logits, dim=1)
                correct_preds += (predictions == val_targets).sum().item()
                total_preds += val_targets.size(0)

        avg_val_loss = total_val_loss / len(val_loader.dataset)
        val_accuracy = correct_preds / total_preds

        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, "
              f"Val Accuracy: {val_accuracy:.4f}")

    end_time = time.time()
    training_duration = end_time - start_time

    return avg_train_loss, avg_val_loss, val_accuracy, training_duration

#parameters
model_hidden_dim = 512
num_encoder_layers = 4
num_attention_heads = 4
feedforward_dim = 256
model_dropout = 0.1
learning_rate = 0.001
num_epochs = 25

print("\nTraining for Sequence Length = 20")
results = {}
for arch in ['Transformer']:
    print(f"\nTraining {arch} model...")
    final_loss, final_val_loss, final_val_acc, total_time = train_model(arch, train_loader, val_loader, device)
    results[arch] = {
        'train_loss': final_loss,
        'val_loss': final_val_loss,
        'val_accuracy': final_val_acc,
        'execution_time': total_time
    }

print("\nFinal Results:")
for arch, metrics in results.items():
    print(f"\n{arch} Model:")
    print(f"Train Loss: {metrics['train_loss']:.4f}")
    print(f"Val Loss: {metrics['val_loss']:.4f}")
    print(f"Val Accuracy: {metrics['val_accuracy']:.4f}")
    print(f"Time: {metrics['execution_time']:.2f} sec")


Using Device: cuda

Training for Sequence Length = 20

Training Transformer model...




Epoch 1, Train Loss: 2.6869, Val Loss: 2.6956, Val Accuracy: 0.2394
Epoch 2, Train Loss: 2.7034, Val Loss: 2.6695, Val Accuracy: 0.2342
Epoch 3, Train Loss: 2.6784, Val Loss: 2.6233, Val Accuracy: 0.2404
Epoch 4, Train Loss: 2.6540, Val Loss: 2.6464, Val Accuracy: 0.2417
Epoch 5, Train Loss: 2.7019, Val Loss: 2.7339, Val Accuracy: 0.2189
Epoch 6, Train Loss: 2.7890, Val Loss: 2.7104, Val Accuracy: 0.2199
Epoch 7, Train Loss: 2.7055, Val Loss: 2.6884, Val Accuracy: 0.2263
Epoch 8, Train Loss: 2.7101, Val Loss: 2.6803, Val Accuracy: 0.2293
Epoch 9, Train Loss: 2.7093, Val Loss: 2.7196, Val Accuracy: 0.2216
Epoch 10, Train Loss: 2.7246, Val Loss: 2.7048, Val Accuracy: 0.2257
Epoch 11, Train Loss: 2.6860, Val Loss: 2.6505, Val Accuracy: 0.2462
Epoch 12, Train Loss: 2.6898, Val Loss: 2.6768, Val Accuracy: 0.2238
Epoch 13, Train Loss: 2.6914, Val Loss: 2.6851, Val Accuracy: 0.2237
Epoch 14, Train Loss: 2.7124, Val Loss: 2.7393, Val Accuracy: 0.2258
Epoch 15, Train Loss: 2.7507, Val Loss: 2.7

In [41]:
# PROBLEM 2(30)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

#URL form Shakespeareloader.py
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
raw_text = response.text


sequence_len = 30
trimmed_text = raw_text[:sequence_len * (len(raw_text) // sequence_len)]
vocab = sorted(list(set(trimmed_text)))
char_to_index = {char: idx for idx, char in enumerate(vocab)}
index_to_char = {idx: char for idx, char in enumerate(vocab)}

encoded = [char_to_index[char] for char in trimmed_text]

#input-output pairs
input_sequences = []
target_characters = []
for i in range(len(encoded) - sequence_len):
    input_sequences.append(encoded[i:i+sequence_len])
    target_characters.append(encoded[i+sequence_len])

input_tensor = torch.tensor(input_sequences, dtype=torch.long)
target_tensor = torch.tensor(target_characters, dtype=torch.long)


class ShakespeareDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

#datasets and loaders
batch_size = 128
full_dataset = ShakespeareDataset(input_tensor, target_tensor)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)

# Tx model
class TransformerCharModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, output_dim, model_type='Transformer',
                 num_layers=2, num_heads=2, ff_dim=256, dropout=0.1):
        super(TransformerCharModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        if model_type == 'Transformer':
            encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads,
                                                       dim_feedforward=ff_dim, dropout=dropout)
            self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        else:
            raise ValueError("Unsupported model type.")
        self.fc = nn.Linear(embed_dim, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        encoded_output = self.encoder(embedded)
        return self.fc(encoded_output[:, -1, :])  # Last time step

#training function
def train_model(model_type, train_loader, val_loader, device):
    model = TransformerCharModel(
        vocab_size=len(vocab),
        embed_dim=model_hidden_dim,
        output_dim=len(vocab),
        model_type=model_type,
        num_layers=num_encoder_layers,
        num_heads=num_attention_heads,
        ff_dim=feedforward_dim,
        dropout=model_dropout
    ).to(device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0
        for batch_inputs, batch_targets in train_loader:
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
            optimizer.zero_grad()
            logits = model(batch_inputs)
            loss = loss_fn(logits, batch_targets)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * batch_inputs.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)

        #validation
        model.eval()
        total_val_loss = 0.0
        correct_preds = 0
        total_preds = 0
        with torch.no_grad():
            for val_inputs, val_targets in val_loader:
                val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
                val_logits = model(val_inputs)
                loss = loss_fn(val_logits, val_targets)
                total_val_loss += loss.item() * val_inputs.size(0)
                predictions = torch.argmax(val_logits, dim=1)
                correct_preds += (predictions == val_targets).sum().item()
                total_preds += val_targets.size(0)

        avg_val_loss = total_val_loss / len(val_loader.dataset)
        val_accuracy = correct_preds / total_preds

        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, "
              f"Val Accuracy: {val_accuracy:.4f}")

    end_time = time.time()
    training_duration = end_time - start_time

    return avg_train_loss, avg_val_loss, val_accuracy, training_duration

#parameters
model_hidden_dim = 512
num_encoder_layers = 4
num_attention_heads = 4
feedforward_dim = 256
model_dropout = 0.1
learning_rate = 0.001
num_epochs = 25

print("\nTraining for Sequence Length = 20")
results = {}
for arch in ['Transformer']:
    print(f"\nTraining {arch} model...")
    final_loss, final_val_loss, final_val_acc, total_time = train_model(arch, train_loader, val_loader, device)
    results[arch] = {
        'train_loss': final_loss,
        'val_loss': final_val_loss,
        'val_accuracy': final_val_acc,
        'execution_time': total_time
    }

print("\nFinal Results:")
for arch, metrics in results.items():
    print(f"\n{arch} Model:")
    print(f"Train Loss: {metrics['train_loss']:.4f}")
    print(f"Val Loss: {metrics['val_loss']:.4f}")
    print(f"Val Accuracy: {metrics['val_accuracy']:.4f}")
    print(f"Time: {metrics['execution_time']:.2f} sec")


Using Device: cuda

Training for Sequence Length = 20

Training Transformer model...
Epoch 1, Train Loss: 2.7392, Val Loss: 2.6927, Val Accuracy: 0.2355
Epoch 2, Train Loss: 2.7566, Val Loss: 2.7290, Val Accuracy: 0.2274
Epoch 3, Train Loss: 2.7498, Val Loss: 2.7184, Val Accuracy: 0.2318
Epoch 4, Train Loss: 2.7418, Val Loss: 2.7196, Val Accuracy: 0.2278
Epoch 5, Train Loss: 2.7187, Val Loss: 2.7158, Val Accuracy: 0.2218
Epoch 6, Train Loss: 2.7727, Val Loss: 2.7966, Val Accuracy: 0.2099
Epoch 7, Train Loss: 2.8011, Val Loss: 2.7650, Val Accuracy: 0.2154
Epoch 8, Train Loss: 2.7749, Val Loss: 2.7364, Val Accuracy: 0.2084
Epoch 9, Train Loss: 2.7212, Val Loss: 2.6791, Val Accuracy: 0.2251
Epoch 10, Train Loss: 2.6866, Val Loss: 2.6600, Val Accuracy: 0.2366
Epoch 11, Train Loss: 2.6827, Val Loss: 2.6960, Val Accuracy: 0.2234
Epoch 12, Train Loss: 2.7086, Val Loss: 2.6841, Val Accuracy: 0.2291
Epoch 13, Train Loss: 2.6841, Val Loss: 2.6574, Val Accuracy: 0.2236
Epoch 14, Train Loss: 2.702

In [46]:
# PROBLEM 2(50)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

#URL form Shakespeareloader.py
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
raw_text = response.text


sequence_len = 50
trimmed_text = raw_text[:sequence_len * (len(raw_text) // sequence_len)]
vocab = sorted(list(set(trimmed_text)))
char_to_index = {char: idx for idx, char in enumerate(vocab)}
index_to_char = {idx: char for idx, char in enumerate(vocab)}

encoded = [char_to_index[char] for char in trimmed_text]

#input-output pairs
input_sequences = []
target_characters = []
for i in range(len(encoded) - sequence_len):
    input_sequences.append(encoded[i:i+sequence_len])
    target_characters.append(encoded[i+sequence_len])

input_tensor = torch.tensor(input_sequences, dtype=torch.long)
target_tensor = torch.tensor(target_characters, dtype=torch.long)


class ShakespeareDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

#datasets and loaders
batch_size = 128
full_dataset = ShakespeareDataset(input_tensor, target_tensor)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size)

# Tx model
class TransformerCharModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, output_dim, model_type='Transformer',
                 num_layers=2, num_heads=2, ff_dim=256, dropout=0.1):
        super(TransformerCharModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        if model_type == 'Transformer':
            encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads,
                                                       dim_feedforward=ff_dim, dropout=dropout)
            self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        else:
            raise ValueError("Unsupported model type.")
        self.fc = nn.Linear(embed_dim, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        encoded_output = self.encoder(embedded)
        return self.fc(encoded_output[:, -1, :])  # Last time step

#training function
def train_model(model_type, train_loader, val_loader, device):
    model = TransformerCharModel(
        vocab_size=len(vocab),
        embed_dim=model_hidden_dim,
        output_dim=len(vocab),
        model_type=model_type,
        num_layers=num_encoder_layers,
        num_heads=num_attention_heads,
        ff_dim=feedforward_dim,
        dropout=model_dropout
    ).to(device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0
        for batch_inputs, batch_targets in train_loader:
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)
            optimizer.zero_grad()
            logits = model(batch_inputs)
            loss = loss_fn(logits, batch_targets)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item() * batch_inputs.size(0)

        avg_train_loss = total_train_loss / len(train_loader.dataset)

        #validation
        model.eval()
        total_val_loss = 0.0
        correct_preds = 0
        total_preds = 0
        with torch.no_grad():
            for val_inputs, val_targets in val_loader:
                val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
                val_logits = model(val_inputs)
                loss = loss_fn(val_logits, val_targets)
                total_val_loss += loss.item() * val_inputs.size(0)
                predictions = torch.argmax(val_logits, dim=1)
                correct_preds += (predictions == val_targets).sum().item()
                total_preds += val_targets.size(0)

        avg_val_loss = total_val_loss / len(val_loader.dataset)
        val_accuracy = correct_preds / total_preds

        print(f"Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, "
              f"Val Accuracy: {val_accuracy:.4f}")

    end_time = time.time()
    training_duration = end_time - start_time

    return avg_train_loss, avg_val_loss, val_accuracy, training_duration

#parameters
model_hidden_dim = 512
num_encoder_layers = 4
num_attention_heads = 4
feedforward_dim = 256
model_dropout = 0.1
learning_rate = 0.001
num_epochs = 5

print("\nTraining for Sequence Length = 20")
results = {}
for arch in ['Transformer']:
    print(f"\nTraining {arch} model...")
    final_loss, final_val_loss, final_val_acc, total_time = train_model(arch, train_loader, val_loader, device)
    results[arch] = {
        'train_loss': final_loss,
        'val_loss': final_val_loss,
        'val_accuracy': final_val_acc,
        'execution_time': total_time
    }

print("\nFinal Results:")
for arch, metrics in results.items():
    print(f"\n{arch} Model:")
    print(f"Train Loss: {metrics['train_loss']:.4f}")
    print(f"Val Loss: {metrics['val_loss']:.4f}")
    print(f"Val Accuracy: {metrics['val_accuracy']:.4f}")
    print(f"Time: {metrics['execution_time']:.2f} sec")


Using Device: cuda

Training for Sequence Length = 20

Training Transformer model...
Epoch 1, Train Loss: 3.2992, Val Loss: 3.3193, Val Accuracy: 0.1529
Epoch 2, Train Loss: 3.3172, Val Loss: 3.3167, Val Accuracy: 0.1529
Epoch 3, Train Loss: 3.3158, Val Loss: 3.3170, Val Accuracy: 0.1529
Epoch 4, Train Loss: 3.3152, Val Loss: 3.3156, Val Accuracy: 0.1529
Epoch 5, Train Loss: 3.3148, Val Loss: 3.3150, Val Accuracy: 0.1529

Final Results:

Transformer Model:
Train Loss: 3.3148
Val Loss: 3.3150
Val Accuracy: 0.1529
Time: 3172.28 sec


In [48]:
#PROBLEM 3
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]


START_TOKEN = 0
END_TOKEN = 1



token_to_index = {"SOS": START_TOKEN, "EOS": END_TOKEN}
for eng, fr in english_to_french:
    for word in eng.split() + fr.split():
        if word not in token_to_index:
            token_to_index[word] = len(token_to_index)

index_to_token = {i: word for word, i in token_to_index.items()}

#custom dataset
class PairedTranslationDataset(Dataset):
    def __init__(self, pairs, token_map):
        self.pairs = pairs
        self.token_map = token_map

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        eng, fr = self.pairs[idx]
        source = [self.token_map[word] for word in eng.split()] + [END_TOKEN]
        target = [self.token_map[word] for word in fr.split()] + [END_TOKEN]
        return torch.tensor(source, dtype=torch.long), torch.tensor(target, dtype=torch.long)

#dataLoader
translation_data = PairedTranslationDataset(english_to_french, token_to_index)
loader = DataLoader(translation_data, batch_size=1, shuffle=True)

#transformer model
class SimpleTransformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, n_layers=1, heads=2, dropout=0.1):
        super(SimpleTransformer, self).__init__()
        self.src_embed = nn.Embedding(src_vocab_size, embed_size)
        self.tgt_embed = nn.Embedding(tgt_vocab_size, embed_size)
        self.transformer_block = nn.Transformer(
            d_model=embed_size,
            nhead=heads,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers,
            dim_feedforward=embed_size * 4,
            dropout=dropout
        )
        self.output_layer = nn.Linear(embed_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embed(src).permute(1, 0, 2)
        tgt_emb = self.tgt_embed(tgt).permute(1, 0, 2)

        src_mask = self.create_pad_mask(src)
        tgt_mask = self.create_pad_mask(tgt)
        tgt_seq_mask = self.create_subsequent_mask(tgt)

        transformed = self.transformer_block(
            src_emb, tgt_emb,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.output_layer(transformed).permute(1, 0, 2)

    def create_pad_mask(self, seq):
        return (seq == token_to_index["EOS"])

    def create_subsequent_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

vocab_size = len(token_to_index)
model_dim = 64
transformer_model = SimpleTransformer(vocab_size, vocab_size, model_dim).to(device)

#loss and optim
lr = 0.0005
loss_fn = nn.CrossEntropyLoss(ignore_index=END_TOKEN)
optimizer = optim.Adam(transformer_model.parameters(), lr=lr)

#training loop
epochs = 20
for ep in range(epochs):
    total_loss, correct_preds, total_preds = 0, 0, 0
    transformer_model.train()

    for src_tensor, tgt_tensor in loader:
        src_tensor = src_tensor.to(device)
        tgt_tensor = tgt_tensor.to(device)

        optimizer.zero_grad()
        logits = transformer_model(src_tensor, tgt_tensor[:, :-1])
        logits = logits.reshape(-1, logits.shape[-1])
        target_flat = tgt_tensor[:, 1:].reshape(-1)

        loss = loss_fn(logits, target_flat)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = torch.argmax(logits, dim=1)
        correct_preds += (predicted == target_flat).sum().item()
        total_preds += target_flat.size(0)

    print(f"Epoch [{ep+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {correct_preds / total_preds:.4f}")

#evaluation
def evaluate(model, dataloader, criterion):
    model.eval()
    eval_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for src_tensor, tgt_tensor in dataloader:
            src_tensor = src_tensor.to(device)
            tgt_tensor = tgt_tensor.to(device)

            logits = model(src_tensor, tgt_tensor[:, :-1])
            logits = logits.reshape(-1, logits.shape[-1])
            target_flat = tgt_tensor[:, 1:].reshape(-1)

            loss = criterion(logits, target_flat)
            eval_loss += loss.item()

            predicted = torch.argmax(logits, dim=1)
            correct += (predicted == target_flat).sum().item()
            total += target_flat.size(0)

    return eval_loss / len(dataloader), correct / total

loss, acc = evaluate(transformer_model, loader, loss_fn)
print(f"Evaluation Loss: {loss:.4f}, Evaluation Accuracy: {acc:.4f}")

Using Device: cuda
Epoch [1/20], Loss: 704.1741, Accuracy: 0.0260
Epoch [2/20], Loss: 597.6162, Accuracy: 0.0465
Epoch [3/20], Loss: 536.9873, Accuracy: 0.0911
Epoch [4/20], Loss: 482.2827, Accuracy: 0.1264
Epoch [5/20], Loss: 430.9078, Accuracy: 0.2063
Epoch [6/20], Loss: 379.9720, Accuracy: 0.3067
Epoch [7/20], Loss: 335.8441, Accuracy: 0.3625
Epoch [8/20], Loss: 291.8489, Accuracy: 0.4201
Epoch [9/20], Loss: 253.4640, Accuracy: 0.5130
Epoch [10/20], Loss: 214.9890, Accuracy: 0.5669
Epoch [11/20], Loss: 183.3171, Accuracy: 0.6078
Epoch [12/20], Loss: 151.7505, Accuracy: 0.6691
Epoch [13/20], Loss: 128.7364, Accuracy: 0.6859
Epoch [14/20], Loss: 107.2682, Accuracy: 0.7249
Epoch [15/20], Loss: 89.2647, Accuracy: 0.7268
Epoch [16/20], Loss: 75.0424, Accuracy: 0.7379
Epoch [17/20], Loss: 62.7058, Accuracy: 0.7714
Epoch [18/20], Loss: 54.2188, Accuracy: 0.7602
Epoch [19/20], Loss: 43.5705, Accuracy: 0.7807
Epoch [20/20], Loss: 41.7328, Accuracy: 0.7714
Evaluation Loss: 0.1224, Evaluation 

In [49]:
#PROBLEM 3
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]


START_TOKEN = 0
END_TOKEN = 1



token_to_index = {"SOS": START_TOKEN, "EOS": END_TOKEN}
for eng, fr in english_to_french:
    for word in eng.split() + fr.split():
        if word not in token_to_index:
            token_to_index[word] = len(token_to_index)

index_to_token = {i: word for word, i in token_to_index.items()}

#custom dataset
class PairedTranslationDataset(Dataset):
    def __init__(self, pairs, token_map):
        self.pairs = pairs
        self.token_map = token_map

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        eng, fr = self.pairs[idx]
        source = [self.token_map[word] for word in eng.split()] + [END_TOKEN]
        target = [self.token_map[word] for word in fr.split()] + [END_TOKEN]
        return torch.tensor(source, dtype=torch.long), torch.tensor(target, dtype=torch.long)

#dataLoader
translation_data = PairedTranslationDataset(english_to_french, token_to_index)
loader = DataLoader(translation_data, batch_size=1, shuffle=True)

#transformer model
class SimpleTransformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, n_layers=2, heads=2, dropout=0.1):
        super(SimpleTransformer, self).__init__()
        self.src_embed = nn.Embedding(src_vocab_size, embed_size)
        self.tgt_embed = nn.Embedding(tgt_vocab_size, embed_size)
        self.transformer_block = nn.Transformer(
            d_model=embed_size,
            nhead=heads,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers,
            dim_feedforward=embed_size * 4,
            dropout=dropout
        )
        self.output_layer = nn.Linear(embed_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embed(src).permute(1, 0, 2)
        tgt_emb = self.tgt_embed(tgt).permute(1, 0, 2)

        src_mask = self.create_pad_mask(src)
        tgt_mask = self.create_pad_mask(tgt)
        tgt_seq_mask = self.create_subsequent_mask(tgt)

        transformed = self.transformer_block(
            src_emb, tgt_emb,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.output_layer(transformed).permute(1, 0, 2)

    def create_pad_mask(self, seq):
        return (seq == token_to_index["EOS"])

    def create_subsequent_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

vocab_size = len(token_to_index)
model_dim = 64
transformer_model = SimpleTransformer(vocab_size, vocab_size, model_dim).to(device)

#loss and optim
lr = 0.0005
loss_fn = nn.CrossEntropyLoss(ignore_index=END_TOKEN)
optimizer = optim.Adam(transformer_model.parameters(), lr=lr)

#training loop
epochs = 20
for ep in range(epochs):
    total_loss, correct_preds, total_preds = 0, 0, 0
    transformer_model.train()

    for src_tensor, tgt_tensor in loader:
        src_tensor = src_tensor.to(device)
        tgt_tensor = tgt_tensor.to(device)

        optimizer.zero_grad()
        logits = transformer_model(src_tensor, tgt_tensor[:, :-1])
        logits = logits.reshape(-1, logits.shape[-1])
        target_flat = tgt_tensor[:, 1:].reshape(-1)

        loss = loss_fn(logits, target_flat)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = torch.argmax(logits, dim=1)
        correct_preds += (predicted == target_flat).sum().item()
        total_preds += target_flat.size(0)

    print(f"Epoch [{ep+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {correct_preds / total_preds:.4f}")

#evaluation
def evaluate(model, dataloader, criterion):
    model.eval()
    eval_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for src_tensor, tgt_tensor in dataloader:
            src_tensor = src_tensor.to(device)
            tgt_tensor = tgt_tensor.to(device)

            logits = model(src_tensor, tgt_tensor[:, :-1])
            logits = logits.reshape(-1, logits.shape[-1])
            target_flat = tgt_tensor[:, 1:].reshape(-1)

            loss = criterion(logits, target_flat)
            eval_loss += loss.item()

            predicted = torch.argmax(logits, dim=1)
            correct += (predicted == target_flat).sum().item()
            total += target_flat.size(0)

    return eval_loss / len(dataloader), correct / total

loss, acc = evaluate(transformer_model, loader, loss_fn)
print(f"Evaluation Loss: {loss:.4f}, Evaluation Accuracy: {acc:.4f}")

Using Device: cuda
Epoch [1/20], Loss: 702.9015, Accuracy: 0.0260
Epoch [2/20], Loss: 596.5291, Accuracy: 0.0520
Epoch [3/20], Loss: 538.8961, Accuracy: 0.0967
Epoch [4/20], Loss: 484.3067, Accuracy: 0.1450
Epoch [5/20], Loss: 435.6667, Accuracy: 0.2007
Epoch [6/20], Loss: 391.3247, Accuracy: 0.2621
Epoch [7/20], Loss: 341.4072, Accuracy: 0.3364
Epoch [8/20], Loss: 307.6692, Accuracy: 0.4257
Epoch [9/20], Loss: 264.7727, Accuracy: 0.4740
Epoch [10/20], Loss: 227.2709, Accuracy: 0.5316
Epoch [11/20], Loss: 194.2135, Accuracy: 0.5706
Epoch [12/20], Loss: 168.2063, Accuracy: 0.6264
Epoch [13/20], Loss: 141.0146, Accuracy: 0.6617
Epoch [14/20], Loss: 116.4183, Accuracy: 0.6933
Epoch [15/20], Loss: 98.6600, Accuracy: 0.7156
Epoch [16/20], Loss: 80.8100, Accuracy: 0.7472
Epoch [17/20], Loss: 66.4631, Accuracy: 0.7602
Epoch [18/20], Loss: 57.4684, Accuracy: 0.7584
Epoch [19/20], Loss: 48.4208, Accuracy: 0.7677
Epoch [20/20], Loss: 37.2261, Accuracy: 0.7862
Evaluation Loss: 0.1091, Evaluation 

In [50]:
#PROBLEM 3
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]


START_TOKEN = 0
END_TOKEN = 1



token_to_index = {"SOS": START_TOKEN, "EOS": END_TOKEN}
for eng, fr in english_to_french:
    for word in eng.split() + fr.split():
        if word not in token_to_index:
            token_to_index[word] = len(token_to_index)

index_to_token = {i: word for word, i in token_to_index.items()}

#custom dataset
class PairedTranslationDataset(Dataset):
    def __init__(self, pairs, token_map):
        self.pairs = pairs
        self.token_map = token_map

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        eng, fr = self.pairs[idx]
        source = [self.token_map[word] for word in eng.split()] + [END_TOKEN]
        target = [self.token_map[word] for word in fr.split()] + [END_TOKEN]
        return torch.tensor(source, dtype=torch.long), torch.tensor(target, dtype=torch.long)

#dataLoader
translation_data = PairedTranslationDataset(english_to_french, token_to_index)
loader = DataLoader(translation_data, batch_size=1, shuffle=True)

#transformer model
class SimpleTransformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, n_layers=4, heads=2, dropout=0.1):
        super(SimpleTransformer, self).__init__()
        self.src_embed = nn.Embedding(src_vocab_size, embed_size)
        self.tgt_embed = nn.Embedding(tgt_vocab_size, embed_size)
        self.transformer_block = nn.Transformer(
            d_model=embed_size,
            nhead=heads,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers,
            dim_feedforward=embed_size * 4,
            dropout=dropout
        )
        self.output_layer = nn.Linear(embed_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embed(src).permute(1, 0, 2)
        tgt_emb = self.tgt_embed(tgt).permute(1, 0, 2)

        src_mask = self.create_pad_mask(src)
        tgt_mask = self.create_pad_mask(tgt)
        tgt_seq_mask = self.create_subsequent_mask(tgt)

        transformed = self.transformer_block(
            src_emb, tgt_emb,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.output_layer(transformed).permute(1, 0, 2)

    def create_pad_mask(self, seq):
        return (seq == token_to_index["EOS"])

    def create_subsequent_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

vocab_size = len(token_to_index)
model_dim = 64
transformer_model = SimpleTransformer(vocab_size, vocab_size, model_dim).to(device)

#loss and optim
lr = 0.0005
loss_fn = nn.CrossEntropyLoss(ignore_index=END_TOKEN)
optimizer = optim.Adam(transformer_model.parameters(), lr=lr)

#training loop
epochs = 20
for ep in range(epochs):
    total_loss, correct_preds, total_preds = 0, 0, 0
    transformer_model.train()

    for src_tensor, tgt_tensor in loader:
        src_tensor = src_tensor.to(device)
        tgt_tensor = tgt_tensor.to(device)

        optimizer.zero_grad()
        logits = transformer_model(src_tensor, tgt_tensor[:, :-1])
        logits = logits.reshape(-1, logits.shape[-1])
        target_flat = tgt_tensor[:, 1:].reshape(-1)

        loss = loss_fn(logits, target_flat)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = torch.argmax(logits, dim=1)
        correct_preds += (predicted == target_flat).sum().item()
        total_preds += target_flat.size(0)

    print(f"Epoch [{ep+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {correct_preds / total_preds:.4f}")

#evaluation
def evaluate(model, dataloader, criterion):
    model.eval()
    eval_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for src_tensor, tgt_tensor in dataloader:
            src_tensor = src_tensor.to(device)
            tgt_tensor = tgt_tensor.to(device)

            logits = model(src_tensor, tgt_tensor[:, :-1])
            logits = logits.reshape(-1, logits.shape[-1])
            target_flat = tgt_tensor[:, 1:].reshape(-1)

            loss = criterion(logits, target_flat)
            eval_loss += loss.item()

            predicted = torch.argmax(logits, dim=1)
            correct += (predicted == target_flat).sum().item()
            total += target_flat.size(0)

    return eval_loss / len(dataloader), correct / total

loss, acc = evaluate(transformer_model, loader, loss_fn)
print(f"Evaluation Loss: {loss:.4f}, Evaluation Accuracy: {acc:.4f}")

Using Device: cuda
Epoch [1/20], Loss: 701.8314, Accuracy: 0.0279
Epoch [2/20], Loss: 620.2638, Accuracy: 0.0520
Epoch [3/20], Loss: 590.5836, Accuracy: 0.0446
Epoch [4/20], Loss: 566.5749, Accuracy: 0.0595
Epoch [5/20], Loss: 538.3696, Accuracy: 0.0706
Epoch [6/20], Loss: 509.3865, Accuracy: 0.0911
Epoch [7/20], Loss: 487.4996, Accuracy: 0.1022
Epoch [8/20], Loss: 455.6708, Accuracy: 0.1506
Epoch [9/20], Loss: 427.8321, Accuracy: 0.1617
Epoch [10/20], Loss: 406.2727, Accuracy: 0.1784
Epoch [11/20], Loss: 376.0735, Accuracy: 0.2063
Epoch [12/20], Loss: 342.8286, Accuracy: 0.2435
Epoch [13/20], Loss: 317.9321, Accuracy: 0.2770
Epoch [14/20], Loss: 282.6011, Accuracy: 0.3532
Epoch [15/20], Loss: 259.5415, Accuracy: 0.3569
Epoch [16/20], Loss: 233.4447, Accuracy: 0.4349
Epoch [17/20], Loss: 208.0834, Accuracy: 0.4814
Epoch [18/20], Loss: 183.3682, Accuracy: 0.5279
Epoch [19/20], Loss: 163.9532, Accuracy: 0.5409
Epoch [20/20], Loss: 146.2844, Accuracy: 0.6041
Evaluation Loss: 0.8104, Evalu

In [52]:
#PROBLEM 3
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]


START_TOKEN = 0
END_TOKEN = 1



token_to_index = {"SOS": START_TOKEN, "EOS": END_TOKEN}
for eng, fr in english_to_french:
    for word in eng.split() + fr.split():
        if word not in token_to_index:
            token_to_index[word] = len(token_to_index)

index_to_token = {i: word for word, i in token_to_index.items()}

#custom dataset
class PairedTranslationDataset(Dataset):
    def __init__(self, pairs, token_map):
        self.pairs = pairs
        self.token_map = token_map

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        eng, fr = self.pairs[idx]
        source = [self.token_map[word] for word in eng.split()] + [END_TOKEN]
        target = [self.token_map[word] for word in fr.split()] + [END_TOKEN]
        return torch.tensor(source, dtype=torch.long), torch.tensor(target, dtype=torch.long)

#dataLoader
translation_data = PairedTranslationDataset(english_to_french, token_to_index)
loader = DataLoader(translation_data, batch_size=1, shuffle=True)

#transformer model
class SimpleTransformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, n_layers=1, heads=4, dropout=0.1):
        super(SimpleTransformer, self).__init__()
        self.src_embed = nn.Embedding(src_vocab_size, embed_size)
        self.tgt_embed = nn.Embedding(tgt_vocab_size, embed_size)
        self.transformer_block = nn.Transformer(
            d_model=embed_size,
            nhead=heads,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers,
            dim_feedforward=embed_size * 4,
            dropout=dropout
        )
        self.output_layer = nn.Linear(embed_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embed(src).permute(1, 0, 2)
        tgt_emb = self.tgt_embed(tgt).permute(1, 0, 2)

        src_mask = self.create_pad_mask(src)
        tgt_mask = self.create_pad_mask(tgt)
        tgt_seq_mask = self.create_subsequent_mask(tgt)

        transformed = self.transformer_block(
            src_emb, tgt_emb,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.output_layer(transformed).permute(1, 0, 2)

    def create_pad_mask(self, seq):
        return (seq == token_to_index["EOS"])

    def create_subsequent_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

vocab_size = len(token_to_index)
model_dim = 64
transformer_model = SimpleTransformer(vocab_size, vocab_size, model_dim).to(device)

#loss and optim
lr = 0.0005
loss_fn = nn.CrossEntropyLoss(ignore_index=END_TOKEN)
optimizer = optim.Adam(transformer_model.parameters(), lr=lr)

#training loop
epochs = 20
for ep in range(epochs):
    total_loss, correct_preds, total_preds = 0, 0, 0
    transformer_model.train()

    for src_tensor, tgt_tensor in loader:
        src_tensor = src_tensor.to(device)
        tgt_tensor = tgt_tensor.to(device)

        optimizer.zero_grad()
        logits = transformer_model(src_tensor, tgt_tensor[:, :-1])
        logits = logits.reshape(-1, logits.shape[-1])
        target_flat = tgt_tensor[:, 1:].reshape(-1)

        loss = loss_fn(logits, target_flat)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = torch.argmax(logits, dim=1)
        correct_preds += (predicted == target_flat).sum().item()
        total_preds += target_flat.size(0)

    print(f"Epoch [{ep+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {correct_preds / total_preds:.4f}")

#evaluation
def evaluate(model, dataloader, criterion):
    model.eval()
    eval_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for src_tensor, tgt_tensor in dataloader:
            src_tensor = src_tensor.to(device)
            tgt_tensor = tgt_tensor.to(device)

            logits = model(src_tensor, tgt_tensor[:, :-1])
            logits = logits.reshape(-1, logits.shape[-1])
            target_flat = tgt_tensor[:, 1:].reshape(-1)

            loss = criterion(logits, target_flat)
            eval_loss += loss.item()

            predicted = torch.argmax(logits, dim=1)
            correct += (predicted == target_flat).sum().item()
            total += target_flat.size(0)

    return eval_loss / len(dataloader), correct / total

loss, acc = evaluate(transformer_model, loader, loss_fn)
print(f"Evaluation Loss: {loss:.4f}, Evaluation Accuracy: {acc:.4f}")

Using Device: cuda




Epoch [1/20], Loss: 700.0375, Accuracy: 0.0223
Epoch [2/20], Loss: 595.2414, Accuracy: 0.0725
Epoch [3/20], Loss: 529.7080, Accuracy: 0.1171
Epoch [4/20], Loss: 476.5407, Accuracy: 0.1617
Epoch [5/20], Loss: 425.3343, Accuracy: 0.2323
Epoch [6/20], Loss: 376.2453, Accuracy: 0.2993
Epoch [7/20], Loss: 327.2179, Accuracy: 0.3773
Epoch [8/20], Loss: 283.1774, Accuracy: 0.4554
Epoch [9/20], Loss: 243.6686, Accuracy: 0.5167
Epoch [10/20], Loss: 209.3154, Accuracy: 0.5576
Epoch [11/20], Loss: 175.2657, Accuracy: 0.6413
Epoch [12/20], Loss: 148.4506, Accuracy: 0.6599
Epoch [13/20], Loss: 123.0121, Accuracy: 0.7063
Epoch [14/20], Loss: 99.2898, Accuracy: 0.7379
Epoch [15/20], Loss: 82.4172, Accuracy: 0.7454
Epoch [16/20], Loss: 69.0804, Accuracy: 0.7454
Epoch [17/20], Loss: 57.6224, Accuracy: 0.7639
Epoch [18/20], Loss: 47.6669, Accuracy: 0.7714
Epoch [19/20], Loss: 42.2299, Accuracy: 0.7807
Epoch [20/20], Loss: 33.3639, Accuracy: 0.7807
Evaluation Loss: 0.1053, Evaluation Accuracy: 0.7881


In [53]:
#PROBLEM 3
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]


START_TOKEN = 0
END_TOKEN = 1



token_to_index = {"SOS": START_TOKEN, "EOS": END_TOKEN}
for eng, fr in english_to_french:
    for word in eng.split() + fr.split():
        if word not in token_to_index:
            token_to_index[word] = len(token_to_index)

index_to_token = {i: word for word, i in token_to_index.items()}

#custom dataset
class PairedTranslationDataset(Dataset):
    def __init__(self, pairs, token_map):
        self.pairs = pairs
        self.token_map = token_map

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        eng, fr = self.pairs[idx]
        source = [self.token_map[word] for word in eng.split()] + [END_TOKEN]
        target = [self.token_map[word] for word in fr.split()] + [END_TOKEN]
        return torch.tensor(source, dtype=torch.long), torch.tensor(target, dtype=torch.long)

#dataLoader
translation_data = PairedTranslationDataset(english_to_french, token_to_index)
loader = DataLoader(translation_data, batch_size=1, shuffle=True)

#transformer model
class SimpleTransformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, n_layers=2, heads=4, dropout=0.1):
        super(SimpleTransformer, self).__init__()
        self.src_embed = nn.Embedding(src_vocab_size, embed_size)
        self.tgt_embed = nn.Embedding(tgt_vocab_size, embed_size)
        self.transformer_block = nn.Transformer(
            d_model=embed_size,
            nhead=heads,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers,
            dim_feedforward=embed_size * 4,
            dropout=dropout
        )
        self.output_layer = nn.Linear(embed_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embed(src).permute(1, 0, 2)
        tgt_emb = self.tgt_embed(tgt).permute(1, 0, 2)

        src_mask = self.create_pad_mask(src)
        tgt_mask = self.create_pad_mask(tgt)
        tgt_seq_mask = self.create_subsequent_mask(tgt)

        transformed = self.transformer_block(
            src_emb, tgt_emb,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.output_layer(transformed).permute(1, 0, 2)

    def create_pad_mask(self, seq):
        return (seq == token_to_index["EOS"])

    def create_subsequent_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

vocab_size = len(token_to_index)
model_dim = 64
transformer_model = SimpleTransformer(vocab_size, vocab_size, model_dim).to(device)

#loss and optim
lr = 0.0005
loss_fn = nn.CrossEntropyLoss(ignore_index=END_TOKEN)
optimizer = optim.Adam(transformer_model.parameters(), lr=lr)

#training loop
epochs = 20
for ep in range(epochs):
    total_loss, correct_preds, total_preds = 0, 0, 0
    transformer_model.train()

    for src_tensor, tgt_tensor in loader:
        src_tensor = src_tensor.to(device)
        tgt_tensor = tgt_tensor.to(device)

        optimizer.zero_grad()
        logits = transformer_model(src_tensor, tgt_tensor[:, :-1])
        logits = logits.reshape(-1, logits.shape[-1])
        target_flat = tgt_tensor[:, 1:].reshape(-1)

        loss = loss_fn(logits, target_flat)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = torch.argmax(logits, dim=1)
        correct_preds += (predicted == target_flat).sum().item()
        total_preds += target_flat.size(0)

    print(f"Epoch [{ep+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {correct_preds / total_preds:.4f}")

#evaluation
def evaluate(model, dataloader, criterion):
    model.eval()
    eval_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for src_tensor, tgt_tensor in dataloader:
            src_tensor = src_tensor.to(device)
            tgt_tensor = tgt_tensor.to(device)

            logits = model(src_tensor, tgt_tensor[:, :-1])
            logits = logits.reshape(-1, logits.shape[-1])
            target_flat = tgt_tensor[:, 1:].reshape(-1)

            loss = criterion(logits, target_flat)
            eval_loss += loss.item()

            predicted = torch.argmax(logits, dim=1)
            correct += (predicted == target_flat).sum().item()
            total += target_flat.size(0)

    return eval_loss / len(dataloader), correct / total

loss, acc = evaluate(transformer_model, loader, loss_fn)
print(f"Evaluation Loss: {loss:.4f}, Evaluation Accuracy: {acc:.4f}")

Using Device: cuda
Epoch [1/20], Loss: 701.1364, Accuracy: 0.0409
Epoch [2/20], Loss: 592.4689, Accuracy: 0.0613
Epoch [3/20], Loss: 534.8872, Accuracy: 0.0874
Epoch [4/20], Loss: 482.9463, Accuracy: 0.1394
Epoch [5/20], Loss: 433.2986, Accuracy: 0.2045
Epoch [6/20], Loss: 390.0136, Accuracy: 0.2677
Epoch [7/20], Loss: 349.5624, Accuracy: 0.3457
Epoch [8/20], Loss: 304.7755, Accuracy: 0.4312
Epoch [9/20], Loss: 262.9395, Accuracy: 0.4870
Epoch [10/20], Loss: 230.8210, Accuracy: 0.5260
Epoch [11/20], Loss: 194.0881, Accuracy: 0.5911
Epoch [12/20], Loss: 163.9997, Accuracy: 0.6543
Epoch [13/20], Loss: 136.7022, Accuracy: 0.6784
Epoch [14/20], Loss: 116.9175, Accuracy: 0.6952
Epoch [15/20], Loss: 94.9813, Accuracy: 0.7305
Epoch [16/20], Loss: 79.8683, Accuracy: 0.7416
Epoch [17/20], Loss: 64.9256, Accuracy: 0.7584
Epoch [18/20], Loss: 52.7111, Accuracy: 0.7714
Epoch [19/20], Loss: 44.6923, Accuracy: 0.7751
Epoch [20/20], Loss: 34.4606, Accuracy: 0.7844
Evaluation Loss: 0.1057, Evaluation 

In [54]:
#PROBLEM 3
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]


START_TOKEN = 0
END_TOKEN = 1



token_to_index = {"SOS": START_TOKEN, "EOS": END_TOKEN}
for eng, fr in english_to_french:
    for word in eng.split() + fr.split():
        if word not in token_to_index:
            token_to_index[word] = len(token_to_index)

index_to_token = {i: word for word, i in token_to_index.items()}

#custom dataset
class PairedTranslationDataset(Dataset):
    def __init__(self, pairs, token_map):
        self.pairs = pairs
        self.token_map = token_map

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        eng, fr = self.pairs[idx]
        source = [self.token_map[word] for word in eng.split()] + [END_TOKEN]
        target = [self.token_map[word] for word in fr.split()] + [END_TOKEN]
        return torch.tensor(source, dtype=torch.long), torch.tensor(target, dtype=torch.long)

#dataLoader
translation_data = PairedTranslationDataset(english_to_french, token_to_index)
loader = DataLoader(translation_data, batch_size=1, shuffle=True)

#transformer model
class SimpleTransformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, n_layers=4, heads=4, dropout=0.1):
        super(SimpleTransformer, self).__init__()
        self.src_embed = nn.Embedding(src_vocab_size, embed_size)
        self.tgt_embed = nn.Embedding(tgt_vocab_size, embed_size)
        self.transformer_block = nn.Transformer(
            d_model=embed_size,
            nhead=heads,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers,
            dim_feedforward=embed_size * 4,
            dropout=dropout
        )
        self.output_layer = nn.Linear(embed_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_emb = self.src_embed(src).permute(1, 0, 2)
        tgt_emb = self.tgt_embed(tgt).permute(1, 0, 2)

        src_mask = self.create_pad_mask(src)
        tgt_mask = self.create_pad_mask(tgt)
        tgt_seq_mask = self.create_subsequent_mask(tgt)

        transformed = self.transformer_block(
            src_emb, tgt_emb,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.output_layer(transformed).permute(1, 0, 2)

    def create_pad_mask(self, seq):
        return (seq == token_to_index["EOS"])

    def create_subsequent_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

vocab_size = len(token_to_index)
model_dim = 64
transformer_model = SimpleTransformer(vocab_size, vocab_size, model_dim).to(device)

#loss and optim
lr = 0.0005
loss_fn = nn.CrossEntropyLoss(ignore_index=END_TOKEN)
optimizer = optim.Adam(transformer_model.parameters(), lr=lr)

#training loop
epochs = 20
for ep in range(epochs):
    total_loss, correct_preds, total_preds = 0, 0, 0
    transformer_model.train()

    for src_tensor, tgt_tensor in loader:
        src_tensor = src_tensor.to(device)
        tgt_tensor = tgt_tensor.to(device)

        optimizer.zero_grad()
        logits = transformer_model(src_tensor, tgt_tensor[:, :-1])
        logits = logits.reshape(-1, logits.shape[-1])
        target_flat = tgt_tensor[:, 1:].reshape(-1)

        loss = loss_fn(logits, target_flat)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        predicted = torch.argmax(logits, dim=1)
        correct_preds += (predicted == target_flat).sum().item()
        total_preds += target_flat.size(0)

    print(f"Epoch [{ep+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {correct_preds / total_preds:.4f}")

#evaluation
def evaluate(model, dataloader, criterion):
    model.eval()
    eval_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for src_tensor, tgt_tensor in dataloader:
            src_tensor = src_tensor.to(device)
            tgt_tensor = tgt_tensor.to(device)

            logits = model(src_tensor, tgt_tensor[:, :-1])
            logits = logits.reshape(-1, logits.shape[-1])
            target_flat = tgt_tensor[:, 1:].reshape(-1)

            loss = criterion(logits, target_flat)
            eval_loss += loss.item()

            predicted = torch.argmax(logits, dim=1)
            correct += (predicted == target_flat).sum().item()
            total += target_flat.size(0)

    return eval_loss / len(dataloader), correct / total

loss, acc = evaluate(transformer_model, loader, loss_fn)
print(f"Evaluation Loss: {loss:.4f}, Evaluation Accuracy: {acc:.4f}")

Using Device: cuda
Epoch [1/20], Loss: 701.1386, Accuracy: 0.0204
Epoch [2/20], Loss: 620.1522, Accuracy: 0.0372
Epoch [3/20], Loss: 589.7730, Accuracy: 0.0483
Epoch [4/20], Loss: 560.4815, Accuracy: 0.0651
Epoch [5/20], Loss: 529.5113, Accuracy: 0.0762
Epoch [6/20], Loss: 498.5904, Accuracy: 0.1041
Epoch [7/20], Loss: 465.3093, Accuracy: 0.1264
Epoch [8/20], Loss: 438.2941, Accuracy: 0.1506
Epoch [9/20], Loss: 408.9849, Accuracy: 0.1877
Epoch [10/20], Loss: 374.6556, Accuracy: 0.2323
Epoch [11/20], Loss: 339.4845, Accuracy: 0.2881
Epoch [12/20], Loss: 310.5677, Accuracy: 0.3383
Epoch [13/20], Loss: 283.6783, Accuracy: 0.3848
Epoch [14/20], Loss: 251.6522, Accuracy: 0.4071
Epoch [15/20], Loss: 225.8724, Accuracy: 0.4610
Epoch [16/20], Loss: 193.5464, Accuracy: 0.5130
Epoch [17/20], Loss: 180.1829, Accuracy: 0.5558
Epoch [18/20], Loss: 158.6883, Accuracy: 0.5818
Epoch [19/20], Loss: 135.1771, Accuracy: 0.6283
Epoch [20/20], Loss: 116.5961, Accuracy: 0.6636
Evaluation Loss: 0.6152, Evalu

In [57]:
#PROBLEM 4
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]

# Flip source and target
french_to_english_data = [(fr, en) for en, fr in english_to_french]


# Special tokens
START_IDX = 0
END_IDX = 1

# Create vocab dictionary
vocab_map = {"SOS": START_IDX, "EOS": END_IDX}
for src, tgt in french_to_english_data:
    for word in src.split() + tgt.split():
        if word not in vocab_map:
            vocab_map[word] = len(vocab_map)

# Custom dataset
class FlipTranslationDataset(Dataset):
    def __init__(self, data, vocab):
        self.data = data
        self.vocab = vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        src_sentence, tgt_sentence = self.data[index]
        src_indices = [self.vocab[word] for word in src_sentence.split()] + [END_IDX]
        tgt_indices = [self.vocab[word] for word in tgt_sentence.split()] + [END_IDX]
        return torch.tensor(src_indices, dtype=torch.long), torch.tensor(tgt_indices, dtype=torch.long)

# DataLoader
flip_dataset = FlipTranslationDataset(french_to_english_data, vocab_map)
batcher = DataLoader(flip_dataset, batch_size=1, shuffle=True)

# Transformer model definition
class BasicTransformer(nn.Module):
    def __init__(self, input_vocab, output_vocab, dim_model, layers=1, heads=2, drop=0.1):
        super(BasicTransformer, self).__init__()
        self.src_embedding = nn.Embedding(input_vocab, dim_model)
        self.tgt_embedding = nn.Embedding(output_vocab, dim_model)
        self.network = nn.Transformer(
            d_model=dim_model,
            nhead=heads,
            num_encoder_layers=layers,
            num_decoder_layers=layers,
            dim_feedforward=dim_model * 4,
            dropout=drop
        )
        self.final_layer = nn.Linear(dim_model, output_vocab)

    def forward(self, src_input, tgt_input):
        src_encoded = self.src_embedding(src_input).permute(1, 0, 2)
        tgt_encoded = self.tgt_embedding(tgt_input).permute(1, 0, 2)

        src_mask = self.get_padding_mask(src_input)
        tgt_mask = self.get_padding_mask(tgt_input)
        tgt_seq_mask = self.get_future_mask(tgt_input)

        transformed = self.network(
            src_encoded,
            tgt_encoded,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.final_layer(transformed).permute(1, 0, 2)

    def get_padding_mask(self, seq):
        return (seq == vocab_map["EOS"])

    def get_future_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

# Model initialization
vocab_size = len(vocab_map)
model_dim = 64
net = BasicTransformer(vocab_size, vocab_size, model_dim).to(device)

# Training settings
lr_rate = 0.0005
loss_function = nn.CrossEntropyLoss(ignore_index=END_IDX)
optimizer = optim.Adam(net.parameters(), lr=lr_rate)
epochs = 20

# Training loop
for epoch in range(epochs):
    net.train()
    epoch_loss, hits, total_tokens = 0, 0, 0

    for src_batch, tgt_batch in batcher:
        src_batch, tgt_batch = src_batch.to(device), tgt_batch.to(device)
        optimizer.zero_grad()

        preds = net(src_batch, tgt_batch[:, :-1])
        preds = preds.reshape(-1, preds.shape[-1])
        gold = tgt_batch[:, 1:].reshape(-1)

        loss = loss_function(preds, gold)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        hits += (preds.argmax(dim=1) == gold).sum().item()
        total_tokens += gold.size(0)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {hits / total_tokens:.4f}")

# Evaluation
def evaluate(net, dataloader, loss_fn):
    net.eval()
    total_loss, correct, count = 0, 0, 0

    with torch.no_grad():
        for x_input, y_target in dataloader:
            x_input, y_target = x_input.to(device), y_target.to(device)
            preds = net(x_input, y_target[:, :-1])
            preds = preds.reshape(-1, preds.shape[-1])
            y_flat = y_target[:, 1:].reshape(-1)

            loss = loss_fn(preds, y_flat)
            total_loss += loss.item()
            correct += (preds.argmax(dim=1) == y_flat).sum().item()
            count += y_flat.size(0)

    return total_loss / len(dataloader), correct / count

final_loss, final_acc = evaluate(net, batcher, loss_function)
print(f"Evaluation Loss: {final_loss:.4f}, Evaluation Accuracy: {final_acc:.4f}")


Using Device: cuda
Epoch [1/20], Loss: 694.7942, Accuracy: 0.0466
Epoch [2/20], Loss: 584.5519, Accuracy: 0.0830
Epoch [3/20], Loss: 522.9183, Accuracy: 0.1032
Epoch [4/20], Loss: 470.0118, Accuracy: 0.1356
Epoch [5/20], Loss: 414.8649, Accuracy: 0.2308
Epoch [6/20], Loss: 367.1776, Accuracy: 0.2935
Epoch [7/20], Loss: 322.7814, Accuracy: 0.4069
Epoch [8/20], Loss: 284.1753, Accuracy: 0.4717
Epoch [9/20], Loss: 246.7126, Accuracy: 0.5162
Epoch [10/20], Loss: 209.0282, Accuracy: 0.5749
Epoch [11/20], Loss: 178.2862, Accuracy: 0.6093
Epoch [12/20], Loss: 149.8894, Accuracy: 0.6538
Epoch [13/20], Loss: 123.4505, Accuracy: 0.6964
Epoch [14/20], Loss: 104.0893, Accuracy: 0.7085
Epoch [15/20], Loss: 85.8692, Accuracy: 0.7166
Epoch [16/20], Loss: 76.3957, Accuracy: 0.7348
Epoch [17/20], Loss: 64.2792, Accuracy: 0.7449
Epoch [18/20], Loss: 49.1544, Accuracy: 0.7530
Epoch [19/20], Loss: 42.2039, Accuracy: 0.7591
Epoch [20/20], Loss: 40.1879, Accuracy: 0.7571
Evaluation Loss: 0.1142, Evaluation 

In [58]:
#PROBLEM 4
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]

# Flip source and target
french_to_english_data = [(fr, en) for en, fr in english_to_french]


# Special tokens
START_IDX = 0
END_IDX = 1

# Create vocab dictionary
vocab_map = {"SOS": START_IDX, "EOS": END_IDX}
for src, tgt in french_to_english_data:
    for word in src.split() + tgt.split():
        if word not in vocab_map:
            vocab_map[word] = len(vocab_map)

# Custom dataset
class FlipTranslationDataset(Dataset):
    def __init__(self, data, vocab):
        self.data = data
        self.vocab = vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        src_sentence, tgt_sentence = self.data[index]
        src_indices = [self.vocab[word] for word in src_sentence.split()] + [END_IDX]
        tgt_indices = [self.vocab[word] for word in tgt_sentence.split()] + [END_IDX]
        return torch.tensor(src_indices, dtype=torch.long), torch.tensor(tgt_indices, dtype=torch.long)

# DataLoader
flip_dataset = FlipTranslationDataset(french_to_english_data, vocab_map)
batcher = DataLoader(flip_dataset, batch_size=1, shuffle=True)

# Transformer model definition
class BasicTransformer(nn.Module):
    def __init__(self, input_vocab, output_vocab, dim_model, layers=2, heads=2, drop=0.1):
        super(BasicTransformer, self).__init__()
        self.src_embedding = nn.Embedding(input_vocab, dim_model)
        self.tgt_embedding = nn.Embedding(output_vocab, dim_model)
        self.network = nn.Transformer(
            d_model=dim_model,
            nhead=heads,
            num_encoder_layers=layers,
            num_decoder_layers=layers,
            dim_feedforward=dim_model * 4,
            dropout=drop
        )
        self.final_layer = nn.Linear(dim_model, output_vocab)

    def forward(self, src_input, tgt_input):
        src_encoded = self.src_embedding(src_input).permute(1, 0, 2)
        tgt_encoded = self.tgt_embedding(tgt_input).permute(1, 0, 2)

        src_mask = self.get_padding_mask(src_input)
        tgt_mask = self.get_padding_mask(tgt_input)
        tgt_seq_mask = self.get_future_mask(tgt_input)

        transformed = self.network(
            src_encoded,
            tgt_encoded,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.final_layer(transformed).permute(1, 0, 2)

    def get_padding_mask(self, seq):
        return (seq == vocab_map["EOS"])

    def get_future_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

# Model initialization
vocab_size = len(vocab_map)
model_dim = 64
net = BasicTransformer(vocab_size, vocab_size, model_dim).to(device)

# Training settings
lr_rate = 0.0005
loss_function = nn.CrossEntropyLoss(ignore_index=END_IDX)
optimizer = optim.Adam(net.parameters(), lr=lr_rate)
epochs = 20

# Training loop
for epoch in range(epochs):
    net.train()
    epoch_loss, hits, total_tokens = 0, 0, 0

    for src_batch, tgt_batch in batcher:
        src_batch, tgt_batch = src_batch.to(device), tgt_batch.to(device)
        optimizer.zero_grad()

        preds = net(src_batch, tgt_batch[:, :-1])
        preds = preds.reshape(-1, preds.shape[-1])
        gold = tgt_batch[:, 1:].reshape(-1)

        loss = loss_function(preds, gold)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        hits += (preds.argmax(dim=1) == gold).sum().item()
        total_tokens += gold.size(0)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {hits / total_tokens:.4f}")

# Evaluation
def evaluate(net, dataloader, loss_fn):
    net.eval()
    total_loss, correct, count = 0, 0, 0

    with torch.no_grad():
        for x_input, y_target in dataloader:
            x_input, y_target = x_input.to(device), y_target.to(device)
            preds = net(x_input, y_target[:, :-1])
            preds = preds.reshape(-1, preds.shape[-1])
            y_flat = y_target[:, 1:].reshape(-1)

            loss = loss_fn(preds, y_flat)
            total_loss += loss.item()
            correct += (preds.argmax(dim=1) == y_flat).sum().item()
            count += y_flat.size(0)

    return total_loss / len(dataloader), correct / count

final_loss, final_acc = evaluate(net, batcher, loss_function)
print(f"Evaluation Loss: {final_loss:.4f}, Evaluation Accuracy: {final_acc:.4f}")


Using Device: cuda
Epoch [1/20], Loss: 691.9666, Accuracy: 0.0688
Epoch [2/20], Loss: 591.4724, Accuracy: 0.0850
Epoch [3/20], Loss: 540.1838, Accuracy: 0.0931
Epoch [4/20], Loss: 486.4024, Accuracy: 0.1437
Epoch [5/20], Loss: 438.6172, Accuracy: 0.1842
Epoch [6/20], Loss: 394.0879, Accuracy: 0.2551
Epoch [7/20], Loss: 349.6095, Accuracy: 0.3138
Epoch [8/20], Loss: 310.4495, Accuracy: 0.3745
Epoch [9/20], Loss: 275.1861, Accuracy: 0.4555
Epoch [10/20], Loss: 242.3601, Accuracy: 0.4980
Epoch [11/20], Loss: 205.6222, Accuracy: 0.5850
Epoch [12/20], Loss: 178.7289, Accuracy: 0.6174
Epoch [13/20], Loss: 149.4508, Accuracy: 0.6640
Epoch [14/20], Loss: 126.9301, Accuracy: 0.6781
Epoch [15/20], Loss: 102.3824, Accuracy: 0.7247
Epoch [16/20], Loss: 85.7864, Accuracy: 0.7389
Epoch [17/20], Loss: 69.0059, Accuracy: 0.7510
Epoch [18/20], Loss: 58.2876, Accuracy: 0.7591
Epoch [19/20], Loss: 48.7675, Accuracy: 0.7571
Epoch [20/20], Loss: 42.1558, Accuracy: 0.7591
Evaluation Loss: 0.1251, Evaluation

In [59]:
#PROBLEM 4
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]

# Flip source and target
french_to_english_data = [(fr, en) for en, fr in english_to_french]


# Special tokens
START_IDX = 0
END_IDX = 1

# Create vocab dictionary
vocab_map = {"SOS": START_IDX, "EOS": END_IDX}
for src, tgt in french_to_english_data:
    for word in src.split() + tgt.split():
        if word not in vocab_map:
            vocab_map[word] = len(vocab_map)

# Custom dataset
class FlipTranslationDataset(Dataset):
    def __init__(self, data, vocab):
        self.data = data
        self.vocab = vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        src_sentence, tgt_sentence = self.data[index]
        src_indices = [self.vocab[word] for word in src_sentence.split()] + [END_IDX]
        tgt_indices = [self.vocab[word] for word in tgt_sentence.split()] + [END_IDX]
        return torch.tensor(src_indices, dtype=torch.long), torch.tensor(tgt_indices, dtype=torch.long)

# DataLoader
flip_dataset = FlipTranslationDataset(french_to_english_data, vocab_map)
batcher = DataLoader(flip_dataset, batch_size=1, shuffle=True)

# Transformer model definition
class BasicTransformer(nn.Module):
    def __init__(self, input_vocab, output_vocab, dim_model, layers=4, heads=2, drop=0.1):
        super(BasicTransformer, self).__init__()
        self.src_embedding = nn.Embedding(input_vocab, dim_model)
        self.tgt_embedding = nn.Embedding(output_vocab, dim_model)
        self.network = nn.Transformer(
            d_model=dim_model,
            nhead=heads,
            num_encoder_layers=layers,
            num_decoder_layers=layers,
            dim_feedforward=dim_model * 4,
            dropout=drop
        )
        self.final_layer = nn.Linear(dim_model, output_vocab)

    def forward(self, src_input, tgt_input):
        src_encoded = self.src_embedding(src_input).permute(1, 0, 2)
        tgt_encoded = self.tgt_embedding(tgt_input).permute(1, 0, 2)

        src_mask = self.get_padding_mask(src_input)
        tgt_mask = self.get_padding_mask(tgt_input)
        tgt_seq_mask = self.get_future_mask(tgt_input)

        transformed = self.network(
            src_encoded,
            tgt_encoded,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.final_layer(transformed).permute(1, 0, 2)

    def get_padding_mask(self, seq):
        return (seq == vocab_map["EOS"])

    def get_future_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

# Model initialization
vocab_size = len(vocab_map)
model_dim = 64
net = BasicTransformer(vocab_size, vocab_size, model_dim).to(device)

# Training settings
lr_rate = 0.0005
loss_function = nn.CrossEntropyLoss(ignore_index=END_IDX)
optimizer = optim.Adam(net.parameters(), lr=lr_rate)
epochs = 20

# Training loop
for epoch in range(epochs):
    net.train()
    epoch_loss, hits, total_tokens = 0, 0, 0

    for src_batch, tgt_batch in batcher:
        src_batch, tgt_batch = src_batch.to(device), tgt_batch.to(device)
        optimizer.zero_grad()

        preds = net(src_batch, tgt_batch[:, :-1])
        preds = preds.reshape(-1, preds.shape[-1])
        gold = tgt_batch[:, 1:].reshape(-1)

        loss = loss_function(preds, gold)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        hits += (preds.argmax(dim=1) == gold).sum().item()
        total_tokens += gold.size(0)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {hits / total_tokens:.4f}")

# Evaluation
def evaluate(net, dataloader, loss_fn):
    net.eval()
    total_loss, correct, count = 0, 0, 0

    with torch.no_grad():
        for x_input, y_target in dataloader:
            x_input, y_target = x_input.to(device), y_target.to(device)
            preds = net(x_input, y_target[:, :-1])
            preds = preds.reshape(-1, preds.shape[-1])
            y_flat = y_target[:, 1:].reshape(-1)

            loss = loss_fn(preds, y_flat)
            total_loss += loss.item()
            correct += (preds.argmax(dim=1) == y_flat).sum().item()
            count += y_flat.size(0)

    return total_loss / len(dataloader), correct / count

final_loss, final_acc = evaluate(net, batcher, loss_function)
print(f"Evaluation Loss: {final_loss:.4f}, Evaluation Accuracy: {final_acc:.4f}")


Using Device: cuda
Epoch [1/20], Loss: 690.8254, Accuracy: 0.0749
Epoch [2/20], Loss: 607.4997, Accuracy: 0.0850
Epoch [3/20], Loss: 577.0393, Accuracy: 0.0850
Epoch [4/20], Loss: 544.6888, Accuracy: 0.0891
Epoch [5/20], Loss: 519.4969, Accuracy: 0.1032
Epoch [6/20], Loss: 488.6572, Accuracy: 0.1275
Epoch [7/20], Loss: 455.0635, Accuracy: 0.1518
Epoch [8/20], Loss: 430.9354, Accuracy: 0.1518
Epoch [9/20], Loss: 399.1526, Accuracy: 0.2024
Epoch [10/20], Loss: 368.8929, Accuracy: 0.2267
Epoch [11/20], Loss: 346.9021, Accuracy: 0.2632
Epoch [12/20], Loss: 321.0830, Accuracy: 0.2854
Epoch [13/20], Loss: 286.3260, Accuracy: 0.3563
Epoch [14/20], Loss: 260.3852, Accuracy: 0.3887
Epoch [15/20], Loss: 240.9875, Accuracy: 0.4170
Epoch [16/20], Loss: 219.3859, Accuracy: 0.4636
Epoch [17/20], Loss: 193.2021, Accuracy: 0.5223
Epoch [18/20], Loss: 176.3793, Accuracy: 0.5526
Epoch [19/20], Loss: 157.6054, Accuracy: 0.5789
Epoch [20/20], Loss: 134.2669, Accuracy: 0.6194
Evaluation Loss: 0.7446, Evalu

In [60]:
#PROBLEM 4
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]

# Flip source and target
french_to_english_data = [(fr, en) for en, fr in english_to_french]


# Special tokens
START_IDX = 0
END_IDX = 1

# Create vocab dictionary
vocab_map = {"SOS": START_IDX, "EOS": END_IDX}
for src, tgt in french_to_english_data:
    for word in src.split() + tgt.split():
        if word not in vocab_map:
            vocab_map[word] = len(vocab_map)

# Custom dataset
class FlipTranslationDataset(Dataset):
    def __init__(self, data, vocab):
        self.data = data
        self.vocab = vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        src_sentence, tgt_sentence = self.data[index]
        src_indices = [self.vocab[word] for word in src_sentence.split()] + [END_IDX]
        tgt_indices = [self.vocab[word] for word in tgt_sentence.split()] + [END_IDX]
        return torch.tensor(src_indices, dtype=torch.long), torch.tensor(tgt_indices, dtype=torch.long)

# DataLoader
flip_dataset = FlipTranslationDataset(french_to_english_data, vocab_map)
batcher = DataLoader(flip_dataset, batch_size=1, shuffle=True)

# Transformer model definition
class BasicTransformer(nn.Module):
    def __init__(self, input_vocab, output_vocab, dim_model, layers=1, heads=4, drop=0.1):
        super(BasicTransformer, self).__init__()
        self.src_embedding = nn.Embedding(input_vocab, dim_model)
        self.tgt_embedding = nn.Embedding(output_vocab, dim_model)
        self.network = nn.Transformer(
            d_model=dim_model,
            nhead=heads,
            num_encoder_layers=layers,
            num_decoder_layers=layers,
            dim_feedforward=dim_model * 4,
            dropout=drop
        )
        self.final_layer = nn.Linear(dim_model, output_vocab)

    def forward(self, src_input, tgt_input):
        src_encoded = self.src_embedding(src_input).permute(1, 0, 2)
        tgt_encoded = self.tgt_embedding(tgt_input).permute(1, 0, 2)

        src_mask = self.get_padding_mask(src_input)
        tgt_mask = self.get_padding_mask(tgt_input)
        tgt_seq_mask = self.get_future_mask(tgt_input)

        transformed = self.network(
            src_encoded,
            tgt_encoded,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.final_layer(transformed).permute(1, 0, 2)

    def get_padding_mask(self, seq):
        return (seq == vocab_map["EOS"])

    def get_future_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

# Model initialization
vocab_size = len(vocab_map)
model_dim = 64
net = BasicTransformer(vocab_size, vocab_size, model_dim).to(device)

# Training settings
lr_rate = 0.0005
loss_function = nn.CrossEntropyLoss(ignore_index=END_IDX)
optimizer = optim.Adam(net.parameters(), lr=lr_rate)
epochs = 20

# Training loop
for epoch in range(epochs):
    net.train()
    epoch_loss, hits, total_tokens = 0, 0, 0

    for src_batch, tgt_batch in batcher:
        src_batch, tgt_batch = src_batch.to(device), tgt_batch.to(device)
        optimizer.zero_grad()

        preds = net(src_batch, tgt_batch[:, :-1])
        preds = preds.reshape(-1, preds.shape[-1])
        gold = tgt_batch[:, 1:].reshape(-1)

        loss = loss_function(preds, gold)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        hits += (preds.argmax(dim=1) == gold).sum().item()
        total_tokens += gold.size(0)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {hits / total_tokens:.4f}")

# Evaluation
def evaluate(net, dataloader, loss_fn):
    net.eval()
    total_loss, correct, count = 0, 0, 0

    with torch.no_grad():
        for x_input, y_target in dataloader:
            x_input, y_target = x_input.to(device), y_target.to(device)
            preds = net(x_input, y_target[:, :-1])
            preds = preds.reshape(-1, preds.shape[-1])
            y_flat = y_target[:, 1:].reshape(-1)

            loss = loss_fn(preds, y_flat)
            total_loss += loss.item()
            correct += (preds.argmax(dim=1) == y_flat).sum().item()
            count += y_flat.size(0)

    return total_loss / len(dataloader), correct / count

final_loss, final_acc = evaluate(net, batcher, loss_function)
print(f"Evaluation Loss: {final_loss:.4f}, Evaluation Accuracy: {final_acc:.4f}")


Using Device: cuda
Epoch [1/20], Loss: 690.7450, Accuracy: 0.0607
Epoch [2/20], Loss: 579.3171, Accuracy: 0.0810
Epoch [3/20], Loss: 518.3899, Accuracy: 0.1012
Epoch [4/20], Loss: 462.2836, Accuracy: 0.1538
Epoch [5/20], Loss: 410.0228, Accuracy: 0.2429
Epoch [6/20], Loss: 358.8684, Accuracy: 0.3340
Epoch [7/20], Loss: 312.3885, Accuracy: 0.4109
Epoch [8/20], Loss: 270.7301, Accuracy: 0.4777
Epoch [9/20], Loss: 231.4096, Accuracy: 0.5364
Epoch [10/20], Loss: 196.0970, Accuracy: 0.5850
Epoch [11/20], Loss: 163.4647, Accuracy: 0.6336
Epoch [12/20], Loss: 137.9185, Accuracy: 0.6741
Epoch [13/20], Loss: 113.4307, Accuracy: 0.7166
Epoch [14/20], Loss: 94.9808, Accuracy: 0.7328
Epoch [15/20], Loss: 81.3046, Accuracy: 0.7510
Epoch [16/20], Loss: 64.0611, Accuracy: 0.7510
Epoch [17/20], Loss: 51.3043, Accuracy: 0.7611
Epoch [18/20], Loss: 43.4965, Accuracy: 0.7591
Epoch [19/20], Loss: 36.6197, Accuracy: 0.7652
Epoch [20/20], Loss: 31.2553, Accuracy: 0.7692
Evaluation Loss: 0.0909, Evaluation A

In [62]:
#PROBLEM 4
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]

# Flip source and target
french_to_english_data = [(fr, en) for en, fr in english_to_french]


# Special tokens
START_IDX = 0
END_IDX = 1

# Create vocab dictionary
vocab_map = {"SOS": START_IDX, "EOS": END_IDX}
for src, tgt in french_to_english_data:
    for word in src.split() + tgt.split():
        if word not in vocab_map:
            vocab_map[word] = len(vocab_map)

# Custom dataset
class FlipTranslationDataset(Dataset):
    def __init__(self, data, vocab):
        self.data = data
        self.vocab = vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        src_sentence, tgt_sentence = self.data[index]
        src_indices = [self.vocab[word] for word in src_sentence.split()] + [END_IDX]
        tgt_indices = [self.vocab[word] for word in tgt_sentence.split()] + [END_IDX]
        return torch.tensor(src_indices, dtype=torch.long), torch.tensor(tgt_indices, dtype=torch.long)

# DataLoader
flip_dataset = FlipTranslationDataset(french_to_english_data, vocab_map)
batcher = DataLoader(flip_dataset, batch_size=1, shuffle=True)

# Transformer model definition
class BasicTransformer(nn.Module):
    def __init__(self, input_vocab, output_vocab, dim_model, layers=2, heads=4, drop=0.1):
        super(BasicTransformer, self).__init__()
        self.src_embedding = nn.Embedding(input_vocab, dim_model)
        self.tgt_embedding = nn.Embedding(output_vocab, dim_model)
        self.network = nn.Transformer(
            d_model=dim_model,
            nhead=heads,
            num_encoder_layers=layers,
            num_decoder_layers=layers,
            dim_feedforward=dim_model * 4,
            dropout=drop
        )
        self.final_layer = nn.Linear(dim_model, output_vocab)

    def forward(self, src_input, tgt_input):
        src_encoded = self.src_embedding(src_input).permute(1, 0, 2)
        tgt_encoded = self.tgt_embedding(tgt_input).permute(1, 0, 2)

        src_mask = self.get_padding_mask(src_input)
        tgt_mask = self.get_padding_mask(tgt_input)
        tgt_seq_mask = self.get_future_mask(tgt_input)

        transformed = self.network(
            src_encoded,
            tgt_encoded,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.final_layer(transformed).permute(1, 0, 2)

    def get_padding_mask(self, seq):
        return (seq == vocab_map["EOS"])

    def get_future_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

# Model initialization
vocab_size = len(vocab_map)
model_dim = 64
net = BasicTransformer(vocab_size, vocab_size, model_dim).to(device)

# Training settings
lr_rate = 0.0005
loss_function = nn.CrossEntropyLoss(ignore_index=END_IDX)
optimizer = optim.Adam(net.parameters(), lr=lr_rate)
epochs = 20

# Training loop
for epoch in range(epochs):
    net.train()
    epoch_loss, hits, total_tokens = 0, 0, 0

    for src_batch, tgt_batch in batcher:
        src_batch, tgt_batch = src_batch.to(device), tgt_batch.to(device)
        optimizer.zero_grad()

        preds = net(src_batch, tgt_batch[:, :-1])
        preds = preds.reshape(-1, preds.shape[-1])
        gold = tgt_batch[:, 1:].reshape(-1)

        loss = loss_function(preds, gold)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        hits += (preds.argmax(dim=1) == gold).sum().item()
        total_tokens += gold.size(0)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {hits / total_tokens:.4f}")

# Evaluation
def evaluate(net, dataloader, loss_fn):
    net.eval()
    total_loss, correct, count = 0, 0, 0

    with torch.no_grad():
        for x_input, y_target in dataloader:
            x_input, y_target = x_input.to(device), y_target.to(device)
            preds = net(x_input, y_target[:, :-1])
            preds = preds.reshape(-1, preds.shape[-1])
            y_flat = y_target[:, 1:].reshape(-1)

            loss = loss_fn(preds, y_flat)
            total_loss += loss.item()
            correct += (preds.argmax(dim=1) == y_flat).sum().item()
            count += y_flat.size(0)

    return total_loss / len(dataloader), correct / count

final_loss, final_acc = evaluate(net, batcher, loss_function)
print(f"Evaluation Loss: {final_loss:.4f}, Evaluation Accuracy: {final_acc:.4f}")


Using Device: cuda
Epoch [1/20], Loss: 693.3194, Accuracy: 0.0628
Epoch [2/20], Loss: 588.4634, Accuracy: 0.0830
Epoch [3/20], Loss: 531.3139, Accuracy: 0.1053
Epoch [4/20], Loss: 474.6070, Accuracy: 0.1478
Epoch [5/20], Loss: 422.5551, Accuracy: 0.2389
Epoch [6/20], Loss: 376.9967, Accuracy: 0.2874
Epoch [7/20], Loss: 332.0455, Accuracy: 0.3704
Epoch [8/20], Loss: 295.1260, Accuracy: 0.4028
Epoch [9/20], Loss: 257.3547, Accuracy: 0.4656
Epoch [10/20], Loss: 223.6361, Accuracy: 0.5364
Epoch [11/20], Loss: 189.5228, Accuracy: 0.5850
Epoch [12/20], Loss: 159.0589, Accuracy: 0.6437
Epoch [13/20], Loss: 129.5732, Accuracy: 0.6802
Epoch [14/20], Loss: 112.0680, Accuracy: 0.7105
Epoch [15/20], Loss: 91.8773, Accuracy: 0.7328
Epoch [16/20], Loss: 74.5482, Accuracy: 0.7470
Epoch [17/20], Loss: 64.2119, Accuracy: 0.7510
Epoch [18/20], Loss: 50.6572, Accuracy: 0.7672
Epoch [19/20], Loss: 41.7924, Accuracy: 0.7611
Epoch [20/20], Loss: 32.1077, Accuracy: 0.7713
Evaluation Loss: 0.0957, Evaluation 

In [63]:
#PROBLEM 4
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

english_to_french = [

    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur"),
    ("They ride bicycles", "Ils font du vélo"),
    ("The coffee is hot", "Le café est chaud"),
    ("She wears glasses", "Elle porte des lunettes"),
    ("We visit our grandparents", "Nous rendons visite à nos grands-parents"),
    ("He plays the guitar", "Il joue de la guitare"),
    ("They go shopping", "Ils font du shopping"),
    ("The teacher explains the lesson", "Le professeur explique la leçon"),
    ("She takes the train to work", "Elle prend le train pour aller au travail"),
    ("We bake cookies", "Nous faisons des biscuits"),
    ("He washes his hands", "Il se lave les mains"),
    ("They enjoy the sunset", "Ils apprécient le coucher du soleil"),
    ("The river flows calmly", "La rivière coule calmement"),
    ("She feeds the cat", "Elle nourrit le chat"),
    ("We visit the museum", "Nous visitons le musée"),
    ("He fixes his bicycle", "Il répare son vélo"),
    ("They paint the walls", "Ils peignent les murs"),
    ("The baby sleeps peacefully", "Le bébé dort paisiblement"),
    ("She ties her shoelaces", "Elle attache ses lacets"),
    ("We climb the stairs", "Nous montons les escaliers"),
    ("He shaves in the morning", "Il se rase le matin"),
    ("They set the table", "Ils mettent la table"),
    ("The airplane takes off", "L'avion décolle"),
    ("She waters the plants", "Elle arrose les plantes"),
    ("We practice yoga", "Nous pratiquons le yoga"),
    ("He turns off the light", "Il éteint la lumière"),
    ("They play video games", "Ils jouent aux jeux vidéo"),
    ("The soup smells delicious", "La soupe sent délicieusement bon"),
    ("She locks the door", "Elle ferme la porte à clé"),
    ("We enjoy a picnic", "Nous profitons d'un pique-nique"),
    ("He checks his email", "Il vérifie ses emails"),
    ("They go to the gym", "Ils vont à la salle de sport"),
    ("The moon shines brightly", "La lune brille intensément"),
    ("She catches the bus", "Elle attrape le bus"),
    ("We greet our neighbors", "Nous saluons nos voisins"),
    ("He combs his hair", "Il se peigne les cheveux"),
    ("They wave goodbye", "Ils font un signe d'adieu")

]

# Flip source and target
french_to_english_data = [(fr, en) for en, fr in english_to_french]


# Special tokens
START_IDX = 0
END_IDX = 1

# Create vocab dictionary
vocab_map = {"SOS": START_IDX, "EOS": END_IDX}
for src, tgt in french_to_english_data:
    for word in src.split() + tgt.split():
        if word not in vocab_map:
            vocab_map[word] = len(vocab_map)

# Custom dataset
class FlipTranslationDataset(Dataset):
    def __init__(self, data, vocab):
        self.data = data
        self.vocab = vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        src_sentence, tgt_sentence = self.data[index]
        src_indices = [self.vocab[word] for word in src_sentence.split()] + [END_IDX]
        tgt_indices = [self.vocab[word] for word in tgt_sentence.split()] + [END_IDX]
        return torch.tensor(src_indices, dtype=torch.long), torch.tensor(tgt_indices, dtype=torch.long)

# DataLoader
flip_dataset = FlipTranslationDataset(french_to_english_data, vocab_map)
batcher = DataLoader(flip_dataset, batch_size=1, shuffle=True)

# Transformer model definition
class BasicTransformer(nn.Module):
    def __init__(self, input_vocab, output_vocab, dim_model, layers=4, heads=4, drop=0.1):
        super(BasicTransformer, self).__init__()
        self.src_embedding = nn.Embedding(input_vocab, dim_model)
        self.tgt_embedding = nn.Embedding(output_vocab, dim_model)
        self.network = nn.Transformer(
            d_model=dim_model,
            nhead=heads,
            num_encoder_layers=layers,
            num_decoder_layers=layers,
            dim_feedforward=dim_model * 4,
            dropout=drop
        )
        self.final_layer = nn.Linear(dim_model, output_vocab)

    def forward(self, src_input, tgt_input):
        src_encoded = self.src_embedding(src_input).permute(1, 0, 2)
        tgt_encoded = self.tgt_embedding(tgt_input).permute(1, 0, 2)

        src_mask = self.get_padding_mask(src_input)
        tgt_mask = self.get_padding_mask(tgt_input)
        tgt_seq_mask = self.get_future_mask(tgt_input)

        transformed = self.network(
            src_encoded,
            tgt_encoded,
            src_key_padding_mask=src_mask,
            tgt_key_padding_mask=tgt_mask,
            memory_key_padding_mask=src_mask,
            tgt_mask=tgt_seq_mask
        )

        return self.final_layer(transformed).permute(1, 0, 2)

    def get_padding_mask(self, seq):
        return (seq == vocab_map["EOS"])

    def get_future_mask(self, seq):
        size = seq.size(1)
        mask = torch.triu(torch.ones(size, size), diagonal=1).bool()
        return mask.to(seq.device)

# Model initialization
vocab_size = len(vocab_map)
model_dim = 64
net = BasicTransformer(vocab_size, vocab_size, model_dim).to(device)

# Training settings
lr_rate = 0.0005
loss_function = nn.CrossEntropyLoss(ignore_index=END_IDX)
optimizer = optim.Adam(net.parameters(), lr=lr_rate)
epochs = 20

# Training loop
for epoch in range(epochs):
    net.train()
    epoch_loss, hits, total_tokens = 0, 0, 0

    for src_batch, tgt_batch in batcher:
        src_batch, tgt_batch = src_batch.to(device), tgt_batch.to(device)
        optimizer.zero_grad()

        preds = net(src_batch, tgt_batch[:, :-1])
        preds = preds.reshape(-1, preds.shape[-1])
        gold = tgt_batch[:, 1:].reshape(-1)

        loss = loss_function(preds, gold)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        hits += (preds.argmax(dim=1) == gold).sum().item()
        total_tokens += gold.size(0)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {hits / total_tokens:.4f}")

# Evaluation
def evaluate(net, dataloader, loss_fn):
    net.eval()
    total_loss, correct, count = 0, 0, 0

    with torch.no_grad():
        for x_input, y_target in dataloader:
            x_input, y_target = x_input.to(device), y_target.to(device)
            preds = net(x_input, y_target[:, :-1])
            preds = preds.reshape(-1, preds.shape[-1])
            y_flat = y_target[:, 1:].reshape(-1)

            loss = loss_fn(preds, y_flat)
            total_loss += loss.item()
            correct += (preds.argmax(dim=1) == y_flat).sum().item()
            count += y_flat.size(0)

    return total_loss / len(dataloader), correct / count

final_loss, final_acc = evaluate(net, batcher, loss_function)
print(f"Evaluation Loss: {final_loss:.4f}, Evaluation Accuracy: {final_acc:.4f}")


Using Device: cuda
Epoch [1/20], Loss: 697.5597, Accuracy: 0.0648
Epoch [2/20], Loss: 616.2046, Accuracy: 0.0810
Epoch [3/20], Loss: 589.0688, Accuracy: 0.0830
Epoch [4/20], Loss: 566.4357, Accuracy: 0.0830
Epoch [5/20], Loss: 543.9040, Accuracy: 0.0911
Epoch [6/20], Loss: 519.6746, Accuracy: 0.0992
Epoch [7/20], Loss: 492.4871, Accuracy: 0.1073
Epoch [8/20], Loss: 457.1884, Accuracy: 0.1518
Epoch [9/20], Loss: 432.6189, Accuracy: 0.1721
Epoch [10/20], Loss: 407.4150, Accuracy: 0.1862
Epoch [11/20], Loss: 377.7140, Accuracy: 0.2186
Epoch [12/20], Loss: 347.5901, Accuracy: 0.2409
Epoch [13/20], Loss: 322.3324, Accuracy: 0.3036
Epoch [14/20], Loss: 301.5395, Accuracy: 0.3097
Epoch [15/20], Loss: 271.5202, Accuracy: 0.3725
Epoch [16/20], Loss: 247.7576, Accuracy: 0.4170
Epoch [17/20], Loss: 222.0449, Accuracy: 0.4595
Epoch [18/20], Loss: 201.5146, Accuracy: 0.4757
Epoch [19/20], Loss: 173.5557, Accuracy: 0.5486
Epoch [20/20], Loss: 156.6968, Accuracy: 0.5810
Evaluation Loss: 0.8775, Evalu