In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/hinditoeng/train.csv


In [5]:
!pip install evaluate
import re
import warnings
import os
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    TrainerCallback,
)
from datasets import Dataset
import evaluate
import torch
import pandas as pd
import numpy as np
import json

# Disable WANDB
os.environ["WANDB_DISABLED"] = "true"

# Suppress warnings
warnings.filterwarnings("ignore")

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU instead.")

# Load dataset
FILENAME = "/kaggle/input/hinditoeng/train.csv"  # Replace with your dataset file name
translation_data = pd.read_csv(FILENAME)
translation_data.columns

GPU not available. Using CPU instead.


Index(['Unnamed: 0', 'hindi', 'english'], dtype='object')

In [1]:
!pip install evaluate
!pip install transformers datasets evaluate torch
!pip install sacrebleu
import re
import warnings
import os
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    TrainerCallback,
)
from datasets import Dataset
import evaluate
import torch
import pandas as pd
import numpy as np
import json

# Disable WANDB
os.environ["WANDB_DISABLED"] = "true"

# Suppress warnings
warnings.filterwarnings("ignore")

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU instead.")

# Load dataset
FILENAME = "/kaggle/input/hinditoeng/train.csv"  # Replace with your dataset file name
translation_data = pd.read_csv(FILENAME)

# Rename columns for consistency
translation_data.rename(columns={"english_sentence": "english", "hindi_sentence": "hindi"}, inplace=True)

# Preprocessing function
def preprocess_text(text):
    if not isinstance(text, str):
        text = str(text)
    text = re.sub(r"[^a-zA-Z\u0900-\u097F\s]", "", text)  # Keep English and Hindi characters
    return text.strip()

# Preprocess text
translation_data["english"] = translation_data["english"].apply(preprocess_text)
translation_data["hindi"] = translation_data["hindi"].apply(preprocess_text)

# Split dataset into train and test
train_size = 0.8
train_data = translation_data.sample(frac=train_size, random_state=42)
test_data = translation_data.drop(train_data.index)

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_data)
test_dataset = Dataset.from_pandas(test_data)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small").to(device)

# Tokenize function
def tokenize_function(examples):
    model_inputs = tokenizer(
        examples["english"], max_length=64, truncation=True, padding="max_length"
    )
    labels = tokenizer(
        examples["hindi"], max_length=64, truncation=True, padding="max_length"
    ).input_ids
    model_inputs["labels"] = labels
    return model_inputs

# Tokenize datasets
train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Set dataset format for PyTorch
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir="./results_english_hindi",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,  # Increased for GPU
    per_device_eval_batch_size=16,  # Increased for GPU
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=10,
    predict_with_generate=True,
    logging_dir="./logs_english_hindi",
    logging_steps=10,
    gradient_accumulation_steps=2,  # Accumulate gradients for larger effective batch size
    fp16=True,  # Enable mixed precision
    dataloader_num_workers=2,
    remove_unused_columns=False,
    save_strategy="epoch",
)

# Evaluation metric (BLEU)
metric = evaluate.load("sacrebleu")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    decoded_labels = [[label] for label in decoded_labels]  # BLEU expects nested lists
    bleu_result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    accuracy = np.mean(
        [pred.strip() == ref[0].strip() for pred, ref in zip(decoded_preds, decoded_labels)]
    )
    return {"bleu": bleu_result["score"], "accuracy": accuracy}

# Custom logging callback
class CustomTrainerCallback(TrainerCallback):
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs:
            print(f"Epoch: {state.epoch}, Logs: {logs}")

# Trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[CustomTrainerCallback()],
)

# Train the model
print("Training the model on the English-Hindi dataset...")
train_result = trainer.train()

# Save training history
history_file = "./results_english_hindi/training_history.json"
with open(history_file, "w") as f:
    json.dump(train_result.metrics, f)
print(f"Training history saved to {history_file}")

# Save the model
model.save_pretrained("./results_english_hindi/model")
tokenizer.save_pretrained("./results_english_hindi/tokenizer")
print("Model and tokenizer saved to './results_english_hindi/'")

# Evaluate the model
results = trainer.evaluate()
print("Evaluation Results:", results)

# Save evaluation results
evaluation_file = "./results_english_hindi/evaluation_results.json"
with open(evaluation_file, "w") as f:
    json.dump(results, f)
print(f"Evaluation results saved to {evaluation_file}")

# Translate function
def translate_english_to_hindi(english_sentence):
    inputs = tokenizer(
        english_sentence, return_tensors="pt", padding=True, truncation=True, max_length=64
    ).to(device)
    outputs = model.generate(inputs.input_ids, max_length=128, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example translation
english_input = "This is a test sentence for translation."
hindi_translation = translate_english_to_hindi(english_input)
print("English:", english_input)
print("Hindi:", hindi_translation)


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3
Collecting sacrebleu
  Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.0.0-py3-none-any.whl.metadata (8.5 kB)
Downloading sacrebleu-2.4.3-py3-none-any.whl (103 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.0/104.0 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading portalocker-3.0.0-py3-none-any.whl (19 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-3.0.0 sacrebleu-2.4.

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/81858 [00:00<?, ? examples/s]

Map:   0%|          | 0/20464 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]

Training the model on the English-Hindi dataset...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch,Training Loss,Validation Loss,Bleu,Accuracy
0,0.0502,0.046635,0.0,0.943413
2,0.0417,0.044428,0.0,0.943999
4,0.0473,0.04274,3e-06,0.94483
6,0.0492,0.041021,0.0,0.945563
8,0.0417,0.041171,0.0,0.945612
9,0.0383,0.041471,0.0,0.945758


Epoch: 0.003908540160250147, Logs: {'loss': 9.6568, 'grad_norm': 32.969661712646484, 'learning_rate': 1.999374511336982e-05, 'epoch': 0.003908540160250147}
Epoch: 0.007817080320500294, Logs: {'loss': 8.266, 'grad_norm': 32.89570236206055, 'learning_rate': 1.998670836591087e-05, 'epoch': 0.007817080320500294}
Epoch: 0.01172562048075044, Logs: {'loss': 6.7904, 'grad_norm': 91.50592041015625, 'learning_rate': 1.9979671618451916e-05, 'epoch': 0.01172562048075044}
Epoch: 0.015634160641000587, Logs: {'loss': 5.8871, 'grad_norm': 39.18257141113281, 'learning_rate': 1.997185301016419e-05, 'epoch': 0.015634160641000587}
Epoch: 0.019542700801250732, Logs: {'loss': 4.6761, 'grad_norm': 37.833370208740234, 'learning_rate': 1.996403440187647e-05, 'epoch': 0.019542700801250732}
Epoch: 0.02345124096150088, Logs: {'loss': 3.8901, 'grad_norm': 70.52739715576172, 'learning_rate': 1.995621579358874e-05, 'epoch': 0.02345124096150088}
Epoch: 0.027359781121751026, Logs: {'loss': 3.2961, 'grad_norm': 35.2082

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 0.9998045729919874, Logs: {'eval_loss': 0.046634890139102936, 'eval_bleu': 1.0519919183420577e-10, 'eval_accuracy': 0.9434128225175918, 'eval_runtime': 350.4248, 'eval_samples_per_second': 58.398, 'eval_steps_per_second': 3.65, 'epoch': 0.9998045729919874}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 1.0005862810240376, Logs: {'loss': 0.0519, 'grad_norm': 0.15123414993286133, 'learning_rate': 1.8002345582486318e-05, 'epoch': 1.0005862810240376}
Epoch: 1.0044948211842877, Logs: {'loss': 0.05, 'grad_norm': 0.10865063965320587, 'learning_rate': 1.7994526974198593e-05, 'epoch': 1.0044948211842877}
Epoch: 1.0084033613445378, Logs: {'loss': 0.0476, 'grad_norm': 0.14457353949546814, 'learning_rate': 1.798670836591087e-05, 'epoch': 1.0084033613445378}
Epoch: 1.012311901504788, Logs: {'loss': 0.0574, 'grad_norm': 0.2486591786146164, 'learning_rate': 1.7978889757623143e-05, 'epoch': 1.012311901504788}
Epoch: 1.016220441665038, Logs: {'loss': 0.0472, 'grad_norm': 0.13794267177581787, 'learning_rate': 1.797107114933542e-05, 'epoch': 1.016220441665038}
Epoch: 1.0201289818252883, Logs: {'loss': 0.0486, 'grad_norm': 0.22600610554218292, 'learning_rate': 1.7963252541047696e-05, 'epoch': 1.0201289818252883}
Epoch: 1.0240375219855384, Logs: {'loss': 0.0498, 'grad_norm': 0.12839552760124207, '

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 2.0, Logs: {'eval_loss': 0.04548591747879982, 'eval_bleu': 2.906606336236866e-12, 'eval_accuracy': 0.943950351837373, 'eval_runtime': 349.7972, 'eval_samples_per_second': 58.502, 'eval_steps_per_second': 3.656, 'epoch': 2.0}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 2.001172562048075, Logs: {'loss': 0.0572, 'grad_norm': 0.2159370630979538, 'learning_rate': 1.6000781860828775e-05, 'epoch': 2.001172562048075}
Epoch: 2.005081102208325, Logs: {'loss': 0.0488, 'grad_norm': 0.11778285354375839, 'learning_rate': 1.599296325254105e-05, 'epoch': 2.005081102208325}
Epoch: 2.0089896423685754, Logs: {'loss': 0.0451, 'grad_norm': 0.17659012973308563, 'learning_rate': 1.598514464425332e-05, 'epoch': 2.0089896423685754}
Epoch: 2.0128981825288257, Logs: {'loss': 0.0447, 'grad_norm': 0.15343321859836578, 'learning_rate': 1.59773260359656e-05, 'epoch': 2.0128981825288257}
Epoch: 2.0168067226890756, Logs: {'loss': 0.0478, 'grad_norm': 0.12053137272596359, 'learning_rate': 1.5969507427677875e-05, 'epoch': 2.0168067226890756}
Epoch: 2.020715262849326, Logs: {'loss': 0.0487, 'grad_norm': 0.13575226068496704, 'learning_rate': 1.596168881939015e-05, 'epoch': 2.020715262849326}
Epoch: 2.024623803009576, Logs: {'loss': 0.0495, 'grad_norm': 0.13997161388397217, 'lear

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 2.9998045729919873, Logs: {'eval_loss': 0.04442790895700455, 'eval_bleu': 2.9159906177183913e-10, 'eval_accuracy': 0.9439992181391712, 'eval_runtime': 349.9562, 'eval_samples_per_second': 58.476, 'eval_steps_per_second': 3.655, 'epoch': 2.9998045729919873}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 3.0017588430721127, Logs: {'loss': 0.0459, 'grad_norm': 0.16296924650669098, 'learning_rate': 1.3999218139171229e-05, 'epoch': 3.0017588430721127}
Epoch: 3.0056673832323626, Logs: {'loss': 0.0424, 'grad_norm': 0.18742477893829346, 'learning_rate': 1.3991399530883506e-05, 'epoch': 3.0056673832323626}
Epoch: 3.009575923392613, Logs: {'loss': 0.045, 'grad_norm': 0.3165649175643921, 'learning_rate': 1.3983580922595779e-05, 'epoch': 3.009575923392613}
Epoch: 3.013484463552863, Logs: {'loss': 0.0478, 'grad_norm': 0.3389853537082672, 'learning_rate': 1.3975762314308055e-05, 'epoch': 3.013484463552863}
Epoch: 3.017393003713113, Logs: {'loss': 0.0446, 'grad_norm': 0.48451682925224304, 'learning_rate': 1.396794370602033e-05, 'epoch': 3.017393003713113}
Epoch: 3.0213015438733635, Logs: {'loss': 0.0428, 'grad_norm': 0.12960757315158844, 'learning_rate': 1.3960125097732605e-05, 'epoch': 3.0213015438733635}
Epoch: 3.0252100840336134, Logs: {'loss': 0.048, 'grad_norm': 0.13385401666164398, 'le

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 4.0, Logs: {'eval_loss': 0.04340833052992821, 'eval_bleu': 1.2426370595790903e-09, 'eval_accuracy': 0.9444390148553558, 'eval_runtime': 349.2939, 'eval_samples_per_second': 58.587, 'eval_steps_per_second': 3.662, 'epoch': 4.0}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 4.00234512409615, Logs: {'loss': 0.0354, 'grad_norm': 0.3446197807788849, 'learning_rate': 1.1998436278342455e-05, 'epoch': 4.00234512409615}
Epoch: 4.0062536642564, Logs: {'loss': 0.0503, 'grad_norm': 0.27516886591911316, 'learning_rate': 1.1990617670054732e-05, 'epoch': 4.0062536642564}
Epoch: 4.01016220441665, Logs: {'loss': 0.0427, 'grad_norm': 0.1182311400771141, 'learning_rate': 1.1982799061767005e-05, 'epoch': 4.01016220441665}
Epoch: 4.014070744576901, Logs: {'loss': 0.0429, 'grad_norm': 0.1643344610929489, 'learning_rate': 1.1974980453479282e-05, 'epoch': 4.014070744576901}
Epoch: 4.017979284737151, Logs: {'loss': 0.0436, 'grad_norm': 0.11349458247423172, 'learning_rate': 1.1967161845191556e-05, 'epoch': 4.017979284737151}
Epoch: 4.021887824897401, Logs: {'loss': 0.0434, 'grad_norm': 0.10106781125068665, 'learning_rate': 1.1959343236903833e-05, 'epoch': 4.021887824897401}
Epoch: 4.025796365057651, Logs: {'loss': 0.0442, 'grad_norm': 0.1695329248905182, 'learning_rate': 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 5.002931405120187, Logs: {'loss': 0.0469, 'grad_norm': 0.25489482283592224, 'learning_rate': 9.997654417513683e-06, 'epoch': 5.002931405120187}
Epoch: 5.006839945280438, Logs: {'loss': 0.0433, 'grad_norm': 0.12055768072605133, 'learning_rate': 9.989835809225958e-06, 'epoch': 5.006839945280438}
Epoch: 5.010748485440688, Logs: {'loss': 0.0472, 'grad_norm': 0.17062582075595856, 'learning_rate': 9.982017200938235e-06, 'epoch': 5.010748485440688}
Epoch: 5.014657025600938, Logs: {'loss': 0.0502, 'grad_norm': 0.09296009689569473, 'learning_rate': 9.97419859265051e-06, 'epoch': 5.014657025600938}
Epoch: 5.018565565761188, Logs: {'loss': 0.0356, 'grad_norm': 0.22560979425907135, 'learning_rate': 9.966379984362784e-06, 'epoch': 5.018565565761188}
Epoch: 5.022474105921439, Logs: {'loss': 0.0418, 'grad_norm': 0.09843330085277557, 'learning_rate': 9.95856137607506e-06, 'epoch': 5.022474105921439}
Epoch: 5.0263826460816885, Logs: {'loss': 0.041, 'grad_norm': 0.10891970247030258, 'learning_rat

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch: 6.999804572991987, Logs: {'eval_loss': 0.04102117195725441, 'eval_bleu': 0.0, 'eval_accuracy': 0.9455629397967162, 'eval_runtime': 349.5566, 'eval_samples_per_second': 58.543, 'eval_steps_per_second': 3.659, 'epoch': 6.999804572991987}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 7.000195427008013, Logs: {'loss': 0.0401, 'grad_norm': 0.3389039635658264, 'learning_rate': 6.00312744331509e-06, 'epoch': 7.000195427008013}
Epoch: 7.004103967168263, Logs: {'loss': 0.0409, 'grad_norm': 0.1340303272008896, 'learning_rate': 5.995308835027366e-06, 'epoch': 7.004103967168263}
Epoch: 7.008012507328512, Logs: {'loss': 0.0405, 'grad_norm': 0.0989096388220787, 'learning_rate': 5.987490226739641e-06, 'epoch': 7.008012507328512}
Epoch: 7.011921047488763, Logs: {'loss': 0.0407, 'grad_norm': 0.19655492901802063, 'learning_rate': 5.9796716184519165e-06, 'epoch': 7.011921047488763}
Epoch: 7.015829587649013, Logs: {'loss': 0.0439, 'grad_norm': 0.109644815325737, 'learning_rate': 5.971853010164191e-06, 'epoch': 7.015829587649013}
Epoch: 7.019738127809263, Logs: {'loss': 0.0415, 'grad_norm': 0.151803657412529, 'learning_rate': 5.964034401876466e-06, 'epoch': 7.019738127809263}
Epoch: 7.023646667969514, Logs: {'loss': 0.0427, 'grad_norm': 0.16753795742988586, 'learning_rate': 5

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 8.0, Logs: {'eval_loss': 0.04048850014805794, 'eval_bleu': 0.0, 'eval_accuracy': 0.9455629397967162, 'eval_runtime': 348.8835, 'eval_samples_per_second': 58.656, 'eval_steps_per_second': 3.666, 'epoch': 8.0}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 8.00078170803205, Logs: {'loss': 0.0389, 'grad_norm': 0.29725438356399536, 'learning_rate': 4.002345582486317e-06, 'epoch': 8.00078170803205}
Epoch: 8.0046902481923, Logs: {'loss': 0.0496, 'grad_norm': 0.18161101639270782, 'learning_rate': 3.994526974198593e-06, 'epoch': 8.0046902481923}
Epoch: 8.00859878835255, Logs: {'loss': 0.0513, 'grad_norm': 0.16988755762577057, 'learning_rate': 3.986708365910868e-06, 'epoch': 8.00859878835255}
Epoch: 8.0125073285128, Logs: {'loss': 0.0422, 'grad_norm': 0.19423429667949677, 'learning_rate': 3.978889757623144e-06, 'epoch': 8.0125073285128}
Epoch: 8.01641586867305, Logs: {'loss': 0.0427, 'grad_norm': 1.2838157415390015, 'learning_rate': 3.9710711493354185e-06, 'epoch': 8.01641586867305}
Epoch: 8.0203244088333, Logs: {'loss': 0.0401, 'grad_norm': 0.14458313584327698, 'learning_rate': 3.963252541047694e-06, 'epoch': 8.0203244088333}
Epoch: 8.024232948993552, Logs: {'loss': 0.0412, 'grad_norm': 0.10659250617027283, 'learning_rate': 3.9554339327

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 8.999804572991987, Logs: {'eval_loss': 0.0411706306040287, 'eval_bleu': 0.0, 'eval_accuracy': 0.9456118060985145, 'eval_runtime': 348.4833, 'eval_samples_per_second': 58.723, 'eval_steps_per_second': 3.67, 'epoch': 8.999804572991987}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 9.001367989056087, Logs: {'loss': 0.036, 'grad_norm': 0.08481262624263763, 'learning_rate': 2.001563721657545e-06, 'epoch': 9.001367989056087}
Epoch: 9.005276529216337, Logs: {'loss': 0.0417, 'grad_norm': 0.10203515738248825, 'learning_rate': 1.9937451133698206e-06, 'epoch': 9.005276529216337}
Epoch: 9.009185069376588, Logs: {'loss': 0.0394, 'grad_norm': 0.1734808087348938, 'learning_rate': 1.9859265050820955e-06, 'epoch': 9.009185069376588}
Epoch: 9.013093609536838, Logs: {'loss': 0.0424, 'grad_norm': 0.3041340708732605, 'learning_rate': 1.978107896794371e-06, 'epoch': 9.013093609536838}
Epoch: 9.017002149697088, Logs: {'loss': 0.0344, 'grad_norm': 0.17183080315589905, 'learning_rate': 1.970289288506646e-06, 'epoch': 9.017002149697088}
Epoch: 9.020910689857338, Logs: {'loss': 0.0402, 'grad_norm': 0.19109117984771729, 'learning_rate': 1.962470680218921e-06, 'epoch': 9.020910689857338}
Epoch: 9.024819230017588, Logs: {'loss': 0.0369, 'grad_norm': 0.3314269483089447, 'learning_rat

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 9.998045729919875, Logs: {'eval_loss': 0.04147132486104965, 'eval_bleu': 0.0, 'eval_accuracy': 0.9457584050039093, 'eval_runtime': 354.7961, 'eval_samples_per_second': 57.678, 'eval_steps_per_second': 3.605, 'epoch': 9.998045729919875}
Epoch: 9.998045729919875, Logs: {'train_runtime': 8479.2766, 'train_samples_per_second': 96.539, 'train_steps_per_second': 3.017, 'total_flos': 1.3846041493241856e+16, 'train_loss': 0.06780480503150502, 'epoch': 9.998045729919875}
Training history saved to ./results_english_hindi/training_history.json
Model and tokenizer saved to './results_english_hindi/'


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch: 9.998045729919875, Logs: {'eval_loss': 0.04147132486104965, 'eval_bleu': 0.0, 'eval_accuracy': 0.9457584050039093, 'eval_runtime': 346.4635, 'eval_samples_per_second': 59.065, 'eval_steps_per_second': 3.692, 'epoch': 9.998045729919875}
Evaluation Results: {'eval_loss': 0.04147132486104965, 'eval_bleu': 0.0, 'eval_accuracy': 0.9457584050039093, 'eval_runtime': 346.4635, 'eval_samples_per_second': 59.065, 'eval_steps_per_second': 3.692, 'epoch': 9.998045729919875}
Evaluation results saved to ./results_english_hindi/evaluation_results.json
English: This is a test sentence for translation.
Hindi:    
