In [11]:
from nltk.translate.bleu_score import sentence_bleu

# Example reference (true translation) and hypothesis (model-generated translation)
reference = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures', 'that', 'the', 'military', 'will', 'forever', 'heed', 'Party', 'commands']
hypothesis = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 'that', 'the', 'military', 'always', 'obeys', 'the', 'commands', 'of', 'the', 'party']

# Calculate BLEU score
bleu_score = sentence_bleu(reference, hypothesis)
print("BLEU Score:", bleu_score)


BLEU Score: 8.844844403089352e-232


In [12]:
# Tsk 2
# Install necessary libraries
!pip install torch nltk


Collecting torch
  Downloading torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting filelock (from torch)
  Downloading filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)
Collecting sympy==1.13.1 (from torch)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.5.1-cp312-cp312-win_amd64.whl (203.0 MB)
   ---------------------------------------- 0.0/203.0 MB ? eta -:--:--
    --------------------------------------- 4.2/203.0 MB 25.2 MB/s eta 0:00:08
   - -------------------------------------- 9.7/203.0 MB 26.3 MB/s eta 0:00:08
   --- ------------------------------------ 16.3/203.0 MB 28.5 MB/s eta 0:00:07
   ---- -------------------------

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Python312\\share\\man'
Consider using the `--user` option or check the permissions.


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [17]:
import torch
import torch.nn as nn

# Define the Encoder class
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, hidden_dim)
        self.rnn = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
    
    def forward(self, src):
        embedded = self.embedding(src)
        outputs, (hidden, cell) = self.rnn(embedded)
        return outputs, (hidden, cell)

# Define the Decoder without Attention
class DecoderNoAttention(nn.Module):
    def __init__(self, output_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, hidden_dim)
        self.rnn = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, input, hidden, cell):
        embedded = self.embedding(input).unsqueeze(1)  # embedded: [batch_size, 1, hidden_dim]
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        prediction = self.fc(output.squeeze(1))  # prediction: [batch_size, output_dim]
        return prediction, hidden, cell

# Define the Attention Mechanism
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim * 2, hidden_dim)
        self.v = nn.Parameter(torch.rand(hidden_dim))

    def forward(self, hidden, encoder_outputs):
        hidden = hidden.expand(encoder_outputs.size(0), encoder_outputs.size(1), -1)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
        attn_weights = torch.sum(self.v * energy, dim=2)
        return torch.softmax(attn_weights, dim=1)  # attn_weights: [batch_size, src_len]

# Define the Decoder with Attention
class DecoderWithAttention(nn.Module):
    def __init__(self, output_dim, hidden_dim, attention):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, hidden_dim)
        self.rnn = nn.LSTM(hidden_dim * 2, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.attention = attention

    def forward(self, input, hidden, cell, encoder_outputs):
        # input: [batch_size]
        embedded = self.embedding(input).unsqueeze(1)  # embedded: [batch_size, 1, hidden_dim]
        
        attn_weights = self.attention(hidden[-1].unsqueeze(1), encoder_outputs)  # [batch_size, src_len]
        context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs).squeeze(1)  # context: [batch_size, hidden_dim]
        
        rnn_input = torch.cat((embedded.squeeze(1), context), dim=1).unsqueeze(1)  # rnn_input: [batch_size, 1, hidden_dim * 2]
        output, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
        
        prediction = self.fc(torch.cat((output.squeeze(1), context), dim=1))  # prediction: [batch_size, output_dim]
        return prediction, hidden, cell

# Hyperparameters
input_dim = 10  # Vocabulary size of source language
output_dim = 10  # Vocabulary size of target language
hidden_dim = 16  # Hidden dimension for encoder and decoder
batch_size = 2
src_len = 5  # Length of the source sentence
trg_len = 5  # Length of the target sentence

# Instantiate models
encoder = Encoder(input_dim, hidden_dim)
attention = Attention(hidden_dim)
decoder_no_attention = DecoderNoAttention(output_dim, hidden_dim)
decoder_with_attention = DecoderWithAttention(output_dim, hidden_dim, attention)

# Sample data
src = torch.randint(0, input_dim, (batch_size, src_len))  # Randomly generated source batch
trg = torch.randint(0, output_dim, (batch_size, trg_len))  # Randomly generated target batch

# Forward pass through Encoder
encoder_outputs, (hidden, cell) = encoder(src)

# Forward pass through Decoder without Attention
output_no_attention, hidden, cell = decoder_no_attention(trg[:, 0], hidden, cell)
print("Output without Attention:", output_no_attention)

# Forward pass through Decoder with Attention
output_with_attention, hidden, cell = decoder_with_attention(trg[:, 0], hidden, cell, encoder_outputs)
print("Output with Attention:", output_with_attention)

Output without Attention: tensor([[ 0.0862, -0.1857,  0.0156,  0.2860, -0.1255,  0.0380,  0.0161, -0.1708,
          0.0929, -0.0346],
        [ 0.0713,  0.0166,  0.0031,  0.3054,  0.1172,  0.0343, -0.1108, -0.1655,
          0.2278,  0.1083]], grad_fn=<AddmmBackward0>)
Output with Attention: tensor([[ 0.1566, -0.1179, -0.0040, -0.0268, -0.2741, -0.2624, -0.2214, -0.1972,
         -0.0754,  0.0650],
        [ 0.1633, -0.0942, -0.0743,  0.0351, -0.2487, -0.0896, -0.0737, -0.0470,
         -0.0199, -0.0593]], grad_fn=<AddmmBackward0>)


In [42]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
file_path = 'Sentence-pairs-in-English-Hindi-2024-10-19.tsv'
data = pd.read_csv(file_path, delimiter='\t', names=['english', 'hindi'])

# Ensure all text is of type str and handle missing values
data['english'] = data['english'].fillna('').astype(str)
data['hindi'] = data['hindi'].fillna('').astype(str)

# Re-run the tokenizer on the cleaned dataset
english_tokenizer = Tokenizer(filters='')
hindi_tokenizer = Tokenizer(filters='')

english_tokenizer.fit_on_texts(data['english'])
hindi_tokenizer.fit_on_texts(data['hindi'])

# Convert texts to sequences
input_sequences = english_tokenizer.texts_to_sequences(data['english'])
target_sequences = hindi_tokenizer.texts_to_sequences(data['hindi'])

# Pad sequences to ensure uniform length
max_length_input = max(len(seq) for seq in input_sequences)
max_length_target = max(len(seq) for seq in target_sequences)

input_sequences = pad_sequences(input_sequences, maxlen=max_length_input, padding='post')
target_sequences = pad_sequences(target_sequences, maxlen=max_length_target, padding='post')

# Prepare input and output data for training
target_sequences_input = target_sequences[:, :-1]
target_sequences_output = target_sequences[:, 1:]

In [43]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, enc_units):
        super(Encoder, self).__init__()
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(enc_units, return_sequences=True, return_state=True)

    def call(self, x):
        x = self.embedding(x)
        output, state = self.gru(x)
        return output, state

class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights
class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, dec_units):
        super(Decoder, self).__init__()
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(dec_units, return_sequences=True, return_state=True)
        self.fc = tf.keras.layers.Dense(vocab_size)
        self.attention = BahdanauAttention(dec_units)

    def call(self, x, enc_output, hidden):
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state = self.gru(x)
        x = self.fc(output)
        return x, state, attention_weights
encoder = Encoder(len(english_tokenizer.word_index) + 1, 256, 512)
decoder = Decoder(len(hindi_tokenizer.word_index) + 1, 256, 512)

optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

def loss_function(real, pred):
    mask = tf.math.not_equal(real, 0)
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    return tf.reduce_mean(loss_)

 
@tf.function
def train_step(input_seq, target_seq_in, target_seq_out, encoder, decoder):
    loss = 0
    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(input_seq)
        dec_hidden = enc_hidden
        
        for t in range(target_seq_out.shape[1]):
            dec_input = tf.expand_dims(target_seq_in[:, t], 1)
            predictions, dec_hidden, _ = decoder(dec_input, enc_output, dec_hidden)
            
            # Squeeze to match target shape
            predictions = tf.squeeze(predictions, axis=1)
            
            # Calculate loss without using .numpy()
            loss += loss_function(target_seq_out[:, t], predictions)

    batch_loss = loss / int(target_seq_out.shape[1])
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    
    return batch_loss

In [44]:
EPOCHS = 10
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((input_sequences, target_sequences))
train_dataset = train_dataset.batch(batch_size, drop_remainder=True)

for epoch in range(EPOCHS):
    total_loss = 0
    for batch, (input_seq, target_seq) in enumerate(train_dataset):
        target_seq_in = target_seq[:, :-1]
        target_seq_out = target_seq[:, 1:]
        
        # Call the train_step function without using .numpy() here
        batch_loss = train_step(input_seq, target_seq_in, target_seq_out, encoder, decoder)
        total_loss += batch_loss  # No need for .numpy() here

    # Convert the total loss to a numpy value only when printing outside @tf.function
    print(f'Epoch {epoch+1}, Loss: {total_loss}')


Epoch 1, Loss: 6.661006927490234
Epoch 2, Loss: 4.750603675842285
Epoch 3, Loss: 3.4511454105377197
Epoch 4, Loss: 3.3393969535827637
Epoch 5, Loss: 3.2187347412109375
Epoch 6, Loss: 3.1251773834228516
Epoch 7, Loss: 3.026987075805664
Epoch 8, Loss: 2.8859214782714844
Epoch 9, Loss: 2.7351441383361816
Epoch 10, Loss: 2.628237009048462
