In [1]:
import pandas as pd
import transformers
from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets import Dataset
from datasets import load_dataset

In [3]:
translation_dataset = load_dataset('rahular/itihasa')

Using the latest cached version of the dataset since rahular/itihasa couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'Itihasa' at C:\Users\JAGRIT BHARATI\.cache\huggingface\datasets\rahular___itihasa\Itihasa\1.0.0\56645be151b61e1143597f922ccf666b43a5c02b (last modified on Wed Jul  3 22:02:42 2024).


In [6]:
translation_dataset['train'][0]

{'translation': {'en': 'The ascetic Vālmīki asked Nārada, the best of sages and foremost of those conversant with words, ever engaged in austerities and Vedic studies.',
  'sn': 'ॐ तपः स्वाध्यायनिरतं तपस्वी वाग्विदां वरम्। नारदं परिपप्रच्छ वाल्मीकिर्मुनिपुङ्गवम्॥'}}

In [7]:
checkpoint = 'google-t5/t5-small'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [8]:
source_language = 'sn'
target_language = 'en'
prefix = "translate Sanskrit to English"

Here we will write the preprocessing function which will preprocess the inputs, the corrosponding targets. 

We will also use these inputs and targets (they are in list form now) which will be used for the tokenizer to tokenize the variables.

In [9]:
def preprocess_text(input_data):
    inputs = []
    targets = []
    for i in input_data["translation"]:
        inputs.append(prefix + i[source_language])
        targets.append(i[target_language])

    model_inputs = tokenizer(inputs,text_target = targets, max_length = 128, padding = True, truncation = True)
    return model_inputs


Using a mapping function to map the dataset to according to our preprocess function

In [10]:
tokenized_data = translation_dataset.map(preprocess_text,batched=True)

In [11]:
from transformers import DataCollatorForSeq2Seq

In [12]:
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer,model = checkpoint)

In [13]:
import evaluate
metric = evaluate.load('sacrebleu')

Using the latest cached version of the module from C:\Users\JAGRIT BHARATI\.cache\huggingface\modules\evaluate_modules\metrics\evaluate-metric--sacrebleu\28676bf65b4f88b276df566e48e603732d0b4afd237603ebdf92acaacf5be99b (last modified on Sun Jun 30 16:10:58 2024) since it couldn't be found locally at evaluate-metric--sacrebleu, or remotely on the Hugging Face Hub.


In [15]:
import numpy as np

def postprocess_text(preds,labels):
    preds = []
    labels = []
    for pred in preds:
        pred.append(pred.strip())
    for label in labels:
        label.append(label.strip())

    return preds,labels


def compute_metrics(eval_preds):
    preds,labels= eval_preds
    if isinstance(preds,tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds,skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels,skip_special_tokens=True)

    decoded_preds,decoded_labels = postprocess_text(decoded_preds,decoded_labels)

    result =metric.compute(predictions = decoded_preds,references = decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = []
    pad_token_id = tokenizer.pad_token_id
    for pred in preds:
        count_non_padded = np.count_nonzero(pred != pad_token_id)
        prediction_lens.append(count_non_padded)
    
    result["gen_len"] = np.mean(prediction_lens)
    new_result = {}
    for key,value in result.items():
        new_result[key] = round(value,4)
    result = new_result
    return result

In [16]:
from transformers import AutoModelForSeq2SeqLM,Seq2SeqTrainer,Seq2SeqTrainingArguments

model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

In [17]:
training_arguments = Seq2SeqTrainingArguments(
    output_dir = "my_translation_model",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay = 0.01,
    save_total_limit=3,
    num_train_epochs=10,
    predict_with_generate=True 
)


In [18]:
trainer = Seq2SeqTrainer(
    model=model,
    args = training_arguments,
    train_dataset= tokenized_data["train"],
    eval_dataset= tokenized_data["test"],
    tokenizer = tokenizer,
    compute_metrics = compute_metrics
)

In [19]:
trainer.train()

  1%|          | 500/46980 [24:40<37:00:07,  2.87s/it]

{'loss': 2.2348, 'grad_norm': 0.5131970643997192, 'learning_rate': 1.9787143465304387e-05, 'epoch': 0.11}


  2%|▏         | 1000/46980 [48:31<37:19:23,  2.92s/it]

{'loss': 1.6914, 'grad_norm': 0.44326817989349365, 'learning_rate': 1.957428693060877e-05, 'epoch': 0.21}


  3%|▎         | 1500/46980 [1:13:18<37:16:43,  2.95s/it]

{'loss': 1.6506, 'grad_norm': 0.4486537277698517, 'learning_rate': 1.9361430395913157e-05, 'epoch': 0.32}


  4%|▍         | 2000/46980 [1:38:04<38:29:56,  3.08s/it]

{'loss': 1.6139, 'grad_norm': 0.4074905812740326, 'learning_rate': 1.914857386121754e-05, 'epoch': 0.43}


  5%|▌         | 2500/46980 [2:03:20<37:52:13,  3.07s/it]

{'loss': 1.5935, 'grad_norm': 0.4526899456977844, 'learning_rate': 1.8935717326521927e-05, 'epoch': 0.53}


  6%|▋         | 3000/46980 [2:28:42<36:53:24,  3.02s/it]

{'loss': 1.5745, 'grad_norm': 0.39835116267204285, 'learning_rate': 1.872286079182631e-05, 'epoch': 0.64}


  7%|▋         | 3500/46980 [2:53:59<36:43:49,  3.04s/it]

{'loss': 1.5564, 'grad_norm': 0.4703787565231323, 'learning_rate': 1.8510004257130697e-05, 'epoch': 0.74}


  9%|▊         | 4000/46980 [3:19:42<36:12:22,  3.03s/it]

{'loss': 1.5401, 'grad_norm': 0.3369430601596832, 'learning_rate': 1.829714772243508e-05, 'epoch': 0.85}


 10%|▉         | 4500/46980 [3:45:25<36:53:03,  3.13s/it]

{'loss': 1.5215, 'grad_norm': 0.4334086775779724, 'learning_rate': 1.8084291187739463e-05, 'epoch': 0.96}


                                                         
 10%|█         | 4698/46980 [4:15:55<33:12:57,  2.83s/it]

{'eval_loss': 1.4294556379318237, 'eval_bleu': 0.3784, 'eval_gen_len': 19.0, 'eval_runtime': 1212.576, 'eval_samples_per_second': 9.667, 'eval_steps_per_second': 0.604, 'epoch': 1.0}


 11%|█         | 5000/46980 [4:31:25<35:15:56,  3.02s/it]   

{'loss': 1.5191, 'grad_norm': 0.48573628067970276, 'learning_rate': 1.787143465304385e-05, 'epoch': 1.06}


 12%|█▏        | 5500/46980 [4:57:09<35:49:11,  3.11s/it]

{'loss': 1.5172, 'grad_norm': 0.4122028648853302, 'learning_rate': 1.7658578118348237e-05, 'epoch': 1.17}


 13%|█▎        | 6000/46980 [5:22:50<35:44:03,  3.14s/it]

{'loss': 1.5079, 'grad_norm': 0.36000004410743713, 'learning_rate': 1.7445721583652618e-05, 'epoch': 1.28}


 14%|█▍        | 6500/46980 [5:48:39<34:20:04,  3.05s/it]

{'loss': 1.5043, 'grad_norm': 0.41203391551971436, 'learning_rate': 1.7232865048957003e-05, 'epoch': 1.38}


 15%|█▍        | 7000/46980 [6:14:47<35:02:01,  3.15s/it]

{'loss': 1.4861, 'grad_norm': 0.4289776384830475, 'learning_rate': 1.702000851426139e-05, 'epoch': 1.49}


 16%|█▌        | 7500/46980 [6:40:39<34:06:32,  3.11s/it]

{'loss': 1.4929, 'grad_norm': 0.35795578360557556, 'learning_rate': 1.6807151979565773e-05, 'epoch': 1.6}


 17%|█▋        | 8000/46980 [7:06:53<33:44:49,  3.12s/it]

{'loss': 1.4718, 'grad_norm': 0.3926686644554138, 'learning_rate': 1.6594295444870158e-05, 'epoch': 1.7}


 18%|█▊        | 8500/46980 [7:33:09<33:16:53,  3.11s/it]

{'loss': 1.4691, 'grad_norm': 0.3619379699230194, 'learning_rate': 1.6381438910174543e-05, 'epoch': 1.81}


 19%|█▉        | 9000/46980 [8:00:07<34:41:42,  3.29s/it]

{'loss': 1.4644, 'grad_norm': 0.41267129778862, 'learning_rate': 1.6168582375478928e-05, 'epoch': 1.92}


                                                         
 20%|██        | 9396/46980 [8:40:52<28:56:36,  2.77s/it]

{'eval_loss': 1.3757445812225342, 'eval_bleu': 0.4902, 'eval_gen_len': 19.0, 'eval_runtime': 1176.6025, 'eval_samples_per_second': 9.963, 'eval_steps_per_second': 0.623, 'epoch': 2.0}


 20%|██        | 9500/46980 [8:46:17<32:24:53,  3.11s/it]   

{'loss': 1.4665, 'grad_norm': 0.39623597264289856, 'learning_rate': 1.5955725840783313e-05, 'epoch': 2.02}


 21%|██▏       | 10000/46980 [9:12:02<31:46:41,  3.09s/it]

{'loss': 1.4539, 'grad_norm': 0.37025198340415955, 'learning_rate': 1.57428693060877e-05, 'epoch': 2.13}


 22%|██▏       | 10500/46980 [9:37:43<30:34:05,  3.02s/it]

{'loss': 1.4587, 'grad_norm': 0.39580705761909485, 'learning_rate': 1.5530012771392083e-05, 'epoch': 2.23}


 23%|██▎       | 11000/46980 [10:03:25<31:22:25,  3.14s/it]

{'loss': 1.4494, 'grad_norm': 0.43723905086517334, 'learning_rate': 1.531715623669647e-05, 'epoch': 2.34}


 24%|██▍       | 11500/46980 [10:29:12<29:59:33,  3.04s/it]

{'loss': 1.4524, 'grad_norm': 0.3964909017086029, 'learning_rate': 1.5104299702000853e-05, 'epoch': 2.45}


 26%|██▌       | 12000/46980 [10:55:14<30:44:42,  3.16s/it]

{'loss': 1.4453, 'grad_norm': 0.3899906277656555, 'learning_rate': 1.4891443167305237e-05, 'epoch': 2.55}


 27%|██▋       | 12500/46980 [11:21:21<30:31:24,  3.19s/it]

{'loss': 1.4575, 'grad_norm': 0.4698668122291565, 'learning_rate': 1.4678586632609622e-05, 'epoch': 2.66}


 28%|██▊       | 13000/46980 [11:47:57<30:02:17,  3.18s/it]

{'loss': 1.4243, 'grad_norm': 0.3812244236469269, 'learning_rate': 1.4465730097914008e-05, 'epoch': 2.77}


 29%|██▊       | 13500/46980 [12:20:26<39:37:06,  4.26s/it]

{'loss': 1.4474, 'grad_norm': 0.3428551256656647, 'learning_rate': 1.4252873563218392e-05, 'epoch': 2.87}


 30%|██▉       | 14000/46980 [12:48:06<29:05:36,  3.18s/it]

{'loss': 1.4336, 'grad_norm': 0.44031330943107605, 'learning_rate': 1.4040017028522777e-05, 'epoch': 2.98}


                                                           
 30%|███       | 14094/46980 [13:13:23<25:59:17,  2.84s/it]

{'eval_loss': 1.3454383611679077, 'eval_bleu': 0.2794, 'eval_gen_len': 19.0, 'eval_runtime': 1211.3067, 'eval_samples_per_second': 9.677, 'eval_steps_per_second': 0.605, 'epoch': 3.0}


 31%|███       | 14500/46980 [13:35:01<28:58:26,  3.21s/it]   

{'loss': 1.4366, 'grad_norm': 0.3725959360599518, 'learning_rate': 1.3827160493827162e-05, 'epoch': 3.09}


 32%|███▏      | 15000/46980 [14:01:48<28:41:53,  3.23s/it]

{'loss': 1.4315, 'grad_norm': 0.34752845764160156, 'learning_rate': 1.3614303959131547e-05, 'epoch': 3.19}


 33%|███▎      | 15500/46980 [14:28:35<27:38:39,  3.16s/it]

{'loss': 1.433, 'grad_norm': 0.4040771424770355, 'learning_rate': 1.3401447424435932e-05, 'epoch': 3.3}


 34%|███▍      | 16000/46980 [14:55:35<28:16:08,  3.28s/it]

{'loss': 1.4216, 'grad_norm': 0.3832267224788666, 'learning_rate': 1.3188590889740317e-05, 'epoch': 3.41}


 35%|███▌      | 16500/46980 [15:22:25<26:56:41,  3.18s/it]

{'loss': 1.4285, 'grad_norm': 0.36870571970939636, 'learning_rate': 1.29757343550447e-05, 'epoch': 3.51}


 36%|███▌      | 17000/46980 [15:49:30<26:41:04,  3.20s/it]

{'loss': 1.4198, 'grad_norm': 0.3543427586555481, 'learning_rate': 1.2762877820349087e-05, 'epoch': 3.62}


 37%|███▋      | 17500/46980 [16:16:33<26:01:13,  3.18s/it]

{'loss': 1.4154, 'grad_norm': 0.36675599217414856, 'learning_rate': 1.2550021285653472e-05, 'epoch': 3.72}


 38%|███▊      | 18000/46980 [16:43:47<25:45:38,  3.20s/it]

{'loss': 1.3991, 'grad_norm': 0.4737683832645416, 'learning_rate': 1.2337164750957855e-05, 'epoch': 3.83}


 39%|███▉      | 18500/46980 [17:10:55<25:38:39,  3.24s/it]

{'loss': 1.4165, 'grad_norm': 0.43203675746917725, 'learning_rate': 1.212430821626224e-05, 'epoch': 3.94}


                                                           
 40%|████      | 18792/46980 [17:46:54<22:50:19,  2.92s/it]

{'eval_loss': 1.3257191181182861, 'eval_bleu': 0.293, 'eval_gen_len': 19.0, 'eval_runtime': 1204.3567, 'eval_samples_per_second': 9.733, 'eval_steps_per_second': 0.609, 'epoch': 4.0}


 40%|████      | 19000/46980 [17:58:11<25:05:49,  3.23s/it]   

{'loss': 1.4043, 'grad_norm': 0.3600574731826782, 'learning_rate': 1.1911451681566627e-05, 'epoch': 4.04}


 42%|████▏     | 19500/46980 [18:25:24<24:50:33,  3.25s/it]

{'loss': 1.4007, 'grad_norm': 0.38166722655296326, 'learning_rate': 1.169859514687101e-05, 'epoch': 4.15}


 43%|████▎     | 20000/46980 [18:52:51<24:25:50,  3.26s/it]

{'loss': 1.4038, 'grad_norm': 0.3897230327129364, 'learning_rate': 1.1485738612175395e-05, 'epoch': 4.26}


 44%|████▎     | 20500/46980 [19:20:24<24:21:41,  3.31s/it]

{'loss': 1.411, 'grad_norm': 0.3741370141506195, 'learning_rate': 1.1272882077479778e-05, 'epoch': 4.36}


 45%|████▍     | 21000/46980 [19:47:42<23:08:47,  3.21s/it]

{'loss': 1.3978, 'grad_norm': 0.40813228487968445, 'learning_rate': 1.1060025542784165e-05, 'epoch': 4.47}


 46%|████▌     | 21500/46980 [20:15:14<23:31:00,  3.32s/it]

{'loss': 1.4027, 'grad_norm': 0.3697333335876465, 'learning_rate': 1.084716900808855e-05, 'epoch': 4.58}


 47%|████▋     | 22000/46980 [20:42:51<22:54:51,  3.30s/it]

{'loss': 1.3994, 'grad_norm': 0.37990081310272217, 'learning_rate': 1.0634312473392933e-05, 'epoch': 4.68}


 48%|████▊     | 22500/46980 [21:10:32<22:43:19,  3.34s/it]

{'loss': 1.406, 'grad_norm': 0.3308504819869995, 'learning_rate': 1.0421455938697318e-05, 'epoch': 4.79}


 49%|████▉     | 23000/46980 [21:38:17<22:16:37,  3.34s/it]

{'loss': 1.3933, 'grad_norm': 0.32328227162361145, 'learning_rate': 1.0208599404001705e-05, 'epoch': 4.9}


                                                           
 50%|█████     | 23490/46980 [22:25:56<19:30:15,  2.99s/it]

{'eval_loss': 1.3122667074203491, 'eval_bleu': 0.5116, 'eval_gen_len': 19.0, 'eval_runtime': 1218.4956, 'eval_samples_per_second': 9.62, 'eval_steps_per_second': 0.602, 'epoch': 5.0}


 50%|█████     | 23500/46980 [22:26:30<119:06:32, 18.26s/it]  

{'loss': 1.402, 'grad_norm': 0.45943793654441833, 'learning_rate': 9.995742869306088e-06, 'epoch': 5.0}


 51%|█████     | 24000/46980 [22:54:27<21:23:32,  3.35s/it] 

{'loss': 1.395, 'grad_norm': 0.43421706557273865, 'learning_rate': 9.782886334610473e-06, 'epoch': 5.11}


 52%|█████▏    | 24500/46980 [23:22:18<20:58:54,  3.36s/it]

{'loss': 1.3981, 'grad_norm': 0.43061792850494385, 'learning_rate': 9.570029799914858e-06, 'epoch': 5.21}


 53%|█████▎    | 25000/46980 [23:50:22<20:44:05,  3.40s/it]

{'loss': 1.384, 'grad_norm': 0.3576321601867676, 'learning_rate': 9.357173265219243e-06, 'epoch': 5.32}


 54%|█████▍    | 25500/46980 [24:18:25<20:07:13,  3.37s/it]

{'loss': 1.3884, 'grad_norm': 0.44941192865371704, 'learning_rate': 9.144316730523628e-06, 'epoch': 5.43}


 55%|█████▌    | 26000/46980 [24:46:28<19:50:13,  3.40s/it]

{'loss': 1.386, 'grad_norm': 0.362759530544281, 'learning_rate': 8.931460195828012e-06, 'epoch': 5.53}


 56%|█████▋    | 26500/46980 [25:16:06<22:44:39,  4.00s/it]

{'loss': 1.3966, 'grad_norm': 0.41881465911865234, 'learning_rate': 8.718603661132398e-06, 'epoch': 5.64}


 57%|█████▋    | 27000/46980 [25:45:42<19:03:49,  3.43s/it]

{'loss': 1.3703, 'grad_norm': 0.42145267128944397, 'learning_rate': 8.505747126436782e-06, 'epoch': 5.75}


 59%|█████▊    | 27500/46980 [26:13:58<17:52:18,  3.30s/it]

{'loss': 1.3848, 'grad_norm': 0.3562375009059906, 'learning_rate': 8.292890591741167e-06, 'epoch': 5.85}


 60%|█████▉    | 28000/46980 [26:42:13<18:27:05,  3.50s/it]

{'loss': 1.4017, 'grad_norm': 0.35949426889419556, 'learning_rate': 8.080034057045552e-06, 'epoch': 5.96}


                                                           
 60%|██████    | 28188/46980 [27:13:13<15:33:28,  2.98s/it]

{'eval_loss': 1.3023473024368286, 'eval_bleu': 0.5288, 'eval_gen_len': 19.0, 'eval_runtime': 1228.7096, 'eval_samples_per_second': 9.54, 'eval_steps_per_second': 0.597, 'epoch': 6.0}


 61%|██████    | 28500/46980 [27:30:47<16:46:09,  3.27s/it]   

{'loss': 1.3913, 'grad_norm': 0.4387092590332031, 'learning_rate': 7.867177522349937e-06, 'epoch': 6.07}


 62%|██████▏   | 29000/46980 [27:58:51<17:09:19,  3.43s/it]

{'loss': 1.3944, 'grad_norm': 0.37112486362457275, 'learning_rate': 7.654320987654322e-06, 'epoch': 6.17}


 63%|██████▎   | 29500/46980 [28:26:59<16:13:54,  3.34s/it]

{'loss': 1.3792, 'grad_norm': 0.3934601843357086, 'learning_rate': 7.441464452958707e-06, 'epoch': 6.28}


 64%|██████▍   | 30000/46980 [28:55:05<15:44:44,  3.34s/it]

{'loss': 1.3855, 'grad_norm': 0.4080654978752136, 'learning_rate': 7.228607918263091e-06, 'epoch': 6.39}


 65%|██████▍   | 30500/46980 [29:23:03<14:59:27,  3.27s/it]

{'loss': 1.374, 'grad_norm': 0.40781620144844055, 'learning_rate': 7.015751383567476e-06, 'epoch': 6.49}


 66%|██████▌   | 31000/46980 [29:51:01<14:38:37,  3.30s/it]

{'loss': 1.3724, 'grad_norm': 0.3509124219417572, 'learning_rate': 6.802894848871861e-06, 'epoch': 6.6}


 67%|██████▋   | 31500/46980 [30:18:51<14:30:23,  3.37s/it]

{'loss': 1.3764, 'grad_norm': 0.3236774206161499, 'learning_rate': 6.590038314176246e-06, 'epoch': 6.7}


 68%|██████▊   | 32000/46980 [30:46:45<13:53:48,  3.34s/it]

{'loss': 1.3802, 'grad_norm': 0.3556080162525177, 'learning_rate': 6.37718177948063e-06, 'epoch': 6.81}


 69%|██████▉   | 32500/46980 [31:14:36<13:28:12,  3.35s/it]

{'loss': 1.3783, 'grad_norm': 0.37756845355033875, 'learning_rate': 6.164325244785016e-06, 'epoch': 6.92}


                                                           
 70%|███████   | 32886/46980 [31:56:18<11:32:06,  2.95s/it]

{'eval_loss': 1.2956063747406006, 'eval_bleu': 0.3658, 'eval_gen_len': 19.0, 'eval_runtime': 1208.8417, 'eval_samples_per_second': 9.697, 'eval_steps_per_second': 0.606, 'epoch': 7.0}


 70%|███████   | 33000/46980 [32:02:38<13:02:44,  3.36s/it]   

{'loss': 1.3672, 'grad_norm': 0.35666099190711975, 'learning_rate': 5.9514687100894e-06, 'epoch': 7.02}


 71%|███████▏  | 33500/46980 [32:30:18<12:27:31,  3.33s/it]

{'loss': 1.3756, 'grad_norm': 0.4547264277935028, 'learning_rate': 5.738612175393785e-06, 'epoch': 7.13}


 72%|███████▏  | 34000/46980 [32:58:07<12:20:00,  3.42s/it]

{'loss': 1.3811, 'grad_norm': 0.39916136860847473, 'learning_rate': 5.525755640698169e-06, 'epoch': 7.24}


 73%|███████▎  | 34500/46980 [33:25:55<11:16:25,  3.25s/it]

{'loss': 1.3746, 'grad_norm': 0.3546089828014374, 'learning_rate': 5.312899106002555e-06, 'epoch': 7.34}


 74%|███████▍  | 35000/46980 [33:53:40<10:57:03,  3.29s/it]

{'loss': 1.3757, 'grad_norm': 0.45252713561058044, 'learning_rate': 5.100042571306939e-06, 'epoch': 7.45}


 76%|███████▌  | 35500/46980 [34:21:29<10:37:51,  3.33s/it]

{'loss': 1.3753, 'grad_norm': 0.3841280937194824, 'learning_rate': 4.887186036611324e-06, 'epoch': 7.56}


 77%|███████▋  | 36000/46980 [34:49:20<10:07:50,  3.32s/it]

{'loss': 1.364, 'grad_norm': 0.3807196021080017, 'learning_rate': 4.674329501915709e-06, 'epoch': 7.66}


 78%|███████▊  | 36500/46980 [35:17:01<9:37:59,  3.31s/it] 

{'loss': 1.3761, 'grad_norm': 0.44841915369033813, 'learning_rate': 4.461472967220094e-06, 'epoch': 7.77}


 79%|███████▉  | 37000/46980 [35:44:45<9:01:45,  3.26s/it] 

{'loss': 1.3735, 'grad_norm': 0.39620673656463623, 'learning_rate': 4.248616432524479e-06, 'epoch': 7.88}


 80%|███████▉  | 37500/46980 [36:12:25<8:26:21,  3.20s/it]

{'loss': 1.3748, 'grad_norm': 0.3708368241786957, 'learning_rate': 4.035759897828863e-06, 'epoch': 7.98}


                                                          
 80%|████████  | 37584/46980 [36:36:42<7:31:27,  2.88s/it]

{'eval_loss': 1.290703296661377, 'eval_bleu': 0.3853, 'eval_gen_len': 19.0, 'eval_runtime': 1180.6794, 'eval_samples_per_second': 9.928, 'eval_steps_per_second': 0.621, 'epoch': 8.0}


 81%|████████  | 38000/46980 [36:59:44<8:12:28,  3.29s/it]   

{'loss': 1.3654, 'grad_norm': 0.3414614498615265, 'learning_rate': 3.822903363133248e-06, 'epoch': 8.09}


 82%|████████▏ | 38500/46980 [37:27:25<7:51:28,  3.34s/it]

{'loss': 1.3758, 'grad_norm': 0.4676431715488434, 'learning_rate': 3.6100468284376333e-06, 'epoch': 8.19}


 83%|████████▎ | 39000/46980 [37:55:20<7:34:21,  3.42s/it]

{'loss': 1.3679, 'grad_norm': 0.36497995257377625, 'learning_rate': 3.3971902937420183e-06, 'epoch': 8.3}


 84%|████████▍ | 39500/46980 [38:23:27<6:52:12,  3.31s/it]

{'loss': 1.3724, 'grad_norm': 0.4211699366569519, 'learning_rate': 3.184333759046403e-06, 'epoch': 8.41}


 85%|████████▌ | 40000/46980 [38:51:26<6:28:00,  3.34s/it]

{'loss': 1.3683, 'grad_norm': 0.3741753101348877, 'learning_rate': 2.971477224350788e-06, 'epoch': 8.51}


 86%|████████▌ | 40500/46980 [39:19:26<6:04:16,  3.37s/it]

{'loss': 1.376, 'grad_norm': 0.3671800494194031, 'learning_rate': 2.7586206896551725e-06, 'epoch': 8.62}


 87%|████████▋ | 41000/46980 [39:47:29<5:24:26,  3.26s/it]

{'loss': 1.368, 'grad_norm': 0.3745378851890564, 'learning_rate': 2.5457641549595575e-06, 'epoch': 8.73}


 88%|████████▊ | 41500/46980 [40:15:29<5:08:31,  3.38s/it]

{'loss': 1.3581, 'grad_norm': 0.36717653274536133, 'learning_rate': 2.332907620263942e-06, 'epoch': 8.83}


 89%|████████▉ | 42000/46980 [40:43:33<4:42:07,  3.40s/it]

{'loss': 1.3735, 'grad_norm': 0.3613697290420532, 'learning_rate': 2.120051085568327e-06, 'epoch': 8.94}


                                                          
 90%|█████████ | 42282/46980 [41:19:37<3:54:35,  3.00s/it]

{'eval_loss': 1.2879234552383423, 'eval_bleu': 0.3767, 'eval_gen_len': 19.0, 'eval_runtime': 1214.3328, 'eval_samples_per_second': 9.653, 'eval_steps_per_second': 0.604, 'epoch': 9.0}


 90%|█████████ | 42500/46980 [41:31:53<4:15:52,  3.43s/it]   

{'loss': 1.3641, 'grad_norm': 0.4484231770038605, 'learning_rate': 1.9071945508727119e-06, 'epoch': 9.05}


 92%|█████████▏| 43000/46980 [41:59:55<3:40:16,  3.32s/it]

{'loss': 1.3763, 'grad_norm': 0.4203164875507355, 'learning_rate': 1.6943380161770967e-06, 'epoch': 9.15}


 93%|█████████▎| 43500/46980 [42:28:00<3:14:32,  3.35s/it]

{'loss': 1.3567, 'grad_norm': 0.42709651589393616, 'learning_rate': 1.4814814814814815e-06, 'epoch': 9.26}


 94%|█████████▎| 44000/46980 [42:56:11<2:50:37,  3.44s/it]

{'loss': 1.3775, 'grad_norm': 0.4045989513397217, 'learning_rate': 1.2686249467858662e-06, 'epoch': 9.37}


 95%|█████████▍| 44500/46980 [43:24:41<2:19:34,  3.38s/it]

{'loss': 1.3813, 'grad_norm': 0.41313090920448303, 'learning_rate': 1.0557684120902512e-06, 'epoch': 9.47}


 96%|█████████▌| 45000/46980 [43:53:01<1:52:59,  3.42s/it]

{'loss': 1.3676, 'grad_norm': 0.36686912178993225, 'learning_rate': 8.429118773946361e-07, 'epoch': 9.58}


 97%|█████████▋| 45500/46980 [44:21:21<1:22:13,  3.33s/it]

{'loss': 1.3666, 'grad_norm': 0.5007425546646118, 'learning_rate': 6.300553426990209e-07, 'epoch': 9.68}


 98%|█████████▊| 46000/46980 [44:51:14<55:39,  3.41s/it]  

{'loss': 1.3587, 'grad_norm': 0.458306223154068, 'learning_rate': 4.171988080034058e-07, 'epoch': 9.79}


 99%|█████████▉| 46500/46980 [45:19:51<26:55,  3.37s/it]

{'loss': 1.3643, 'grad_norm': 0.3743859529495239, 'learning_rate': 2.0434227330779057e-07, 'epoch': 9.9}


                                                        
100%|██████████| 46980/46980 [46:07:31<00:00,  3.53s/it]

{'eval_loss': 1.2871288061141968, 'eval_bleu': 0.3702, 'eval_gen_len': 19.0, 'eval_runtime': 1227.9835, 'eval_samples_per_second': 9.546, 'eval_steps_per_second': 0.597, 'epoch': 10.0}
{'train_runtime': 166051.4706, 'train_samples_per_second': 4.526, 'train_steps_per_second': 0.283, 'train_loss': 1.4298106538229367, 'epoch': 10.0}





TrainOutput(global_step=46980, training_loss=1.4298106538229367, metrics={'train_runtime': 166051.4706, 'train_samples_per_second': 4.526, 'train_steps_per_second': 0.283, 'total_flos': 2.543140120559616e+16, 'train_loss': 1.4298106538229367, 'epoch': 10.0})

In [20]:
model_save_path = "/.llm_translational_model"
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

('/.llm_translational_model\\tokenizer_config.json',
 '/.llm_translational_model\\special_tokens_map.json',
 '/.llm_translational_model\\spiece.model',
 '/.llm_translational_model\\added_tokens.json',
 '/.llm_translational_model\\tokenizer.json')

In [21]:
example_text = "तस्माच्चिरायुरिच्छत नृपेण मूर्खोऽनुचरो न रक्षणियः ।"
example_text_1 = "अव्यापारेषु व्यापारं यो नरः कर्तुमिच्छति ।"
translated_text = "A king wishing long life should never keep foolish servants."
translated_text_1 = "Anyone who tries to poke into matters which are none of his business,"


In [27]:
from transformers import pipeline
translator = pipeline("translation_en_to_sn", model = model_save_path)
translator(example_text)

[{'translation_text': 'O king, I have been able to slay you, O king,'}]

In [23]:
example_tokenizer = AutoTokenizer.from_pretrained(model_save_path)
inputs = tokenizer(example_text_1,return_tensors="pt").input_ids

In [24]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_save_path)
outputs = model.generate(inputs,max_new_tokens = 48, do_sample= True,top_k = 30, top_p = 8.95)

In [28]:
tokenizer.decode(outputs[0],skip_special_tokens=True)

'In that battle the king of women and the kings, that man of great oblation, on each other, has come here. In the battle of the enemy, the king of nitrator and'