# 🧠 Fine-tune T5/BART for Intent Detection + Response Generation
This notebook shows how to fine-tune a pretrained T5 or BART model on a custom dataset that includes user queries (instructions), intents, and responses.

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
import evaluate
import numpy as np

# Load dataset
dataset = load_dataset('csv', data_files='./nlp_intent_response_dataset.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

# Choose model: 't5-small' or 'facebook/bart-base'
model_checkpoint = 't5-small'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Preprocessing function
max_input_length = 128
max_target_length = 128

def preprocess(example):
    inputs = tokenizer(example['input_text'], truncation=True, padding='max_length', max_length=max_input_length)
    targets = tokenizer(example['target'], truncation=True, padding='max_length', max_length=max_target_length)
    inputs['labels'] = targets['input_ids']
    return inputs

tokenized_data = dataset.map(preprocess, batched=True, remove_columns=dataset['train'].column_names)

Map: 100%|██████████| 24184/24184 [00:06<00:00, 3787.71 examples/s]
Map: 100%|██████████| 2688/2688 [00:00<00:00, 4093.28 examples/s]


In [4]:
# Training configuration
args = Seq2SeqTrainingArguments(
    output_dir='t5_intent_response_model',
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=3,
    predict_with_generate=True,
    logging_dir='./logs',
    logging_steps=50,
)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Evaluation metric
rouge = evaluate.load('rouge')

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    return rouge.compute(predictions=decoded_preds, references=decoded_labels)

In [5]:
# Train the model
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=tokenized_data['train'],
    eval_dataset=tokenized_data['test'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

***** Running training *****
  Num examples = 24184
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 9069
  Number of trainable parameters = 60506624
  0%|          | 0/9069 [00:00<?, ?it/s]You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  1%|          | 50/9069 [00:16<46:33,  3.23it/s] 

{'loss': 5.7627, 'learning_rate': 1.9889734259565556e-05, 'epoch': 0.02}


  1%|          | 100/9069 [00:32<46:39,  3.20it/s]

{'loss': 4.4583, 'learning_rate': 1.977946851913111e-05, 'epoch': 0.03}


  2%|▏         | 150/9069 [00:47<45:48,  3.24it/s]

{'loss': 3.6109, 'learning_rate': 1.966920277869666e-05, 'epoch': 0.05}


  2%|▏         | 200/9069 [01:03<45:28,  3.25it/s]

{'loss': 3.218, 'learning_rate': 1.9558937038262214e-05, 'epoch': 0.07}


  3%|▎         | 250/9069 [01:18<45:24,  3.24it/s]

{'loss': 2.9015, 'learning_rate': 1.9448671297827765e-05, 'epoch': 0.08}


  3%|▎         | 300/9069 [01:34<45:50,  3.19it/s]

{'loss': 2.7743, 'learning_rate': 1.933840555739332e-05, 'epoch': 0.1}


  4%|▍         | 350/9069 [01:49<44:59,  3.23it/s]

{'loss': 2.6111, 'learning_rate': 1.9228139816958873e-05, 'epoch': 0.12}


  4%|▍         | 400/9069 [02:05<44:24,  3.25it/s]

{'loss': 2.4257, 'learning_rate': 1.9117874076524427e-05, 'epoch': 0.13}


  5%|▍         | 450/9069 [02:20<44:15,  3.25it/s]

{'loss': 2.4656, 'learning_rate': 1.9007608336089977e-05, 'epoch': 0.15}


  6%|▌         | 500/9069 [02:36<44:33,  3.21it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-500
Configuration saved in t5_intent_response_model\checkpoint-500\config.json


{'loss': 2.3518, 'learning_rate': 1.889734259565553e-05, 'epoch': 0.17}


Model weights saved in t5_intent_response_model\checkpoint-500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-500\special_tokens_map.json
  6%|▌         | 550/9069 [02:53<43:45,  3.24it/s]  

{'loss': 2.3068, 'learning_rate': 1.8787076855221085e-05, 'epoch': 0.18}


  7%|▋         | 600/9069 [03:08<43:47,  3.22it/s]

{'loss': 2.183, 'learning_rate': 1.8676811114786636e-05, 'epoch': 0.2}


  7%|▋         | 650/9069 [03:24<44:36,  3.15it/s]

{'loss': 2.2316, 'learning_rate': 1.856654537435219e-05, 'epoch': 0.22}


  8%|▊         | 700/9069 [03:40<44:21,  3.14it/s]

{'loss': 2.1608, 'learning_rate': 1.8456279633917744e-05, 'epoch': 0.23}


  8%|▊         | 750/9069 [03:56<42:57,  3.23it/s]

{'loss': 2.1411, 'learning_rate': 1.8346013893483298e-05, 'epoch': 0.25}


  9%|▉         | 800/9069 [04:11<42:47,  3.22it/s]

{'loss': 2.1348, 'learning_rate': 1.823574815304885e-05, 'epoch': 0.26}


  9%|▉         | 850/9069 [04:27<42:35,  3.22it/s]

{'loss': 2.0685, 'learning_rate': 1.8125482412614403e-05, 'epoch': 0.28}


 10%|▉         | 900/9069 [04:42<42:47,  3.18it/s]

{'loss': 2.0953, 'learning_rate': 1.8015216672179957e-05, 'epoch': 0.3}


 10%|█         | 950/9069 [04:58<42:01,  3.22it/s]

{'loss': 1.9753, 'learning_rate': 1.7904950931745507e-05, 'epoch': 0.31}


 11%|█         | 1000/9069 [05:13<41:36,  3.23it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-1000
Configuration saved in t5_intent_response_model\checkpoint-1000\config.json


{'loss': 2.0173, 'learning_rate': 1.779468519131106e-05, 'epoch': 0.33}


Model weights saved in t5_intent_response_model\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-1000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-1000\special_tokens_map.json
 12%|█▏        | 1050/9069 [05:30<44:21,  3.01it/s]  

{'loss': 1.909, 'learning_rate': 1.7684419450876612e-05, 'epoch': 0.35}


 12%|█▏        | 1100/9069 [05:46<41:30,  3.20it/s]

{'loss': 1.9662, 'learning_rate': 1.757415371044217e-05, 'epoch': 0.36}


 13%|█▎        | 1150/9069 [06:02<41:08,  3.21it/s]

{'loss': 1.9615, 'learning_rate': 1.746388797000772e-05, 'epoch': 0.38}


 13%|█▎        | 1200/9069 [06:17<40:55,  3.21it/s]

{'loss': 1.9577, 'learning_rate': 1.7353622229573274e-05, 'epoch': 0.4}


 14%|█▍        | 1250/9069 [06:33<40:20,  3.23it/s]

{'loss': 1.937, 'learning_rate': 1.7243356489138824e-05, 'epoch': 0.41}


 14%|█▍        | 1300/9069 [06:48<40:40,  3.18it/s]

{'loss': 1.974, 'learning_rate': 1.713309074870438e-05, 'epoch': 0.43}


 15%|█▍        | 1350/9069 [07:05<43:02,  2.99it/s]

{'loss': 1.8594, 'learning_rate': 1.7022825008269932e-05, 'epoch': 0.45}


 15%|█▌        | 1400/9069 [07:21<41:07,  3.11it/s]

{'loss': 1.8584, 'learning_rate': 1.6912559267835483e-05, 'epoch': 0.46}


 16%|█▌        | 1450/9069 [07:37<40:52,  3.11it/s]

{'loss': 1.8361, 'learning_rate': 1.6802293527401037e-05, 'epoch': 0.48}


 17%|█▋        | 1500/9069 [07:53<40:09,  3.14it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-1500
Configuration saved in t5_intent_response_model\checkpoint-1500\config.json


{'loss': 1.8442, 'learning_rate': 1.669202778696659e-05, 'epoch': 0.5}


Model weights saved in t5_intent_response_model\checkpoint-1500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-1500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-1500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-500] due to args.save_total_limit
 17%|█▋        | 1550/9069 [08:11<39:55,  3.14it/s]  

{'loss': 1.8192, 'learning_rate': 1.6581762046532145e-05, 'epoch': 0.51}


 18%|█▊        | 1600/9069 [08:27<39:02,  3.19it/s]

{'loss': 1.806, 'learning_rate': 1.6471496306097696e-05, 'epoch': 0.53}


 18%|█▊        | 1650/9069 [08:43<38:30,  3.21it/s]

{'loss': 1.8596, 'learning_rate': 1.636123056566325e-05, 'epoch': 0.55}


 19%|█▊        | 1700/9069 [08:58<38:38,  3.18it/s]

{'loss': 1.8081, 'learning_rate': 1.6250964825228804e-05, 'epoch': 0.56}


 19%|█▉        | 1750/9069 [09:14<37:59,  3.21it/s]

{'loss': 1.8271, 'learning_rate': 1.6140699084794354e-05, 'epoch': 0.58}


 20%|█▉        | 1800/9069 [09:30<38:04,  3.18it/s]

{'loss': 1.7928, 'learning_rate': 1.6030433344359908e-05, 'epoch': 0.6}


 20%|██        | 1850/9069 [09:45<37:25,  3.21it/s]

{'loss': 1.7854, 'learning_rate': 1.5920167603925462e-05, 'epoch': 0.61}


 21%|██        | 1900/9069 [10:01<37:13,  3.21it/s]

{'loss': 1.7487, 'learning_rate': 1.5809901863491016e-05, 'epoch': 0.63}


 22%|██▏       | 1950/9069 [10:17<37:05,  3.20it/s]

{'loss': 1.7399, 'learning_rate': 1.5699636123056567e-05, 'epoch': 0.65}


 22%|██▏       | 2000/9069 [10:32<36:42,  3.21it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-2000
Configuration saved in t5_intent_response_model\checkpoint-2000\config.json


{'loss': 1.7371, 'learning_rate': 1.558937038262212e-05, 'epoch': 0.66}


Model weights saved in t5_intent_response_model\checkpoint-2000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-2000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-2000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-1000] due to args.save_total_limit
 23%|██▎       | 2050/9069 [10:49<36:20,  3.22it/s]  

{'loss': 1.7306, 'learning_rate': 1.5479104642187675e-05, 'epoch': 0.68}


 23%|██▎       | 2100/9069 [11:05<35:57,  3.23it/s]

{'loss': 1.6855, 'learning_rate': 1.5368838901753225e-05, 'epoch': 0.69}


 24%|██▎       | 2150/9069 [11:21<36:23,  3.17it/s]

{'loss': 1.7219, 'learning_rate': 1.525857316131878e-05, 'epoch': 0.71}


 24%|██▍       | 2200/9069 [11:36<36:01,  3.18it/s]

{'loss': 1.7193, 'learning_rate': 1.5148307420884332e-05, 'epoch': 0.73}


 25%|██▍       | 2250/9069 [11:52<35:31,  3.20it/s]

{'loss': 1.7132, 'learning_rate': 1.5038041680449886e-05, 'epoch': 0.74}


 25%|██▌       | 2300/9069 [12:08<35:08,  3.21it/s]

{'loss': 1.708, 'learning_rate': 1.4927775940015438e-05, 'epoch': 0.76}


 26%|██▌       | 2350/9069 [12:24<35:10,  3.18it/s]

{'loss': 1.7642, 'learning_rate': 1.481751019958099e-05, 'epoch': 0.78}


 26%|██▋       | 2400/9069 [12:39<34:27,  3.23it/s]

{'loss': 1.6681, 'learning_rate': 1.4707244459146544e-05, 'epoch': 0.79}


 27%|██▋       | 2450/9069 [12:55<34:32,  3.19it/s]

{'loss': 1.7038, 'learning_rate': 1.4596978718712098e-05, 'epoch': 0.81}


 28%|██▊       | 2500/9069 [13:11<34:05,  3.21it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-2500
Configuration saved in t5_intent_response_model\checkpoint-2500\config.json


{'loss': 1.6693, 'learning_rate': 1.448671297827765e-05, 'epoch': 0.83}


Model weights saved in t5_intent_response_model\checkpoint-2500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-2500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-2500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-1500] due to args.save_total_limit
 28%|██▊       | 2550/9069 [13:28<34:52,  3.12it/s]  

{'loss': 1.697, 'learning_rate': 1.4376447237843203e-05, 'epoch': 0.84}


 29%|██▊       | 2600/9069 [13:43<33:40,  3.20it/s]

{'loss': 1.6635, 'learning_rate': 1.4266181497408755e-05, 'epoch': 0.86}


 29%|██▉       | 2650/9069 [13:59<33:53,  3.16it/s]

{'loss': 1.619, 'learning_rate': 1.415591575697431e-05, 'epoch': 0.88}


 30%|██▉       | 2700/9069 [14:15<34:53,  3.04it/s]

{'loss': 1.6298, 'learning_rate': 1.4045650016539863e-05, 'epoch': 0.89}


 30%|███       | 2750/9069 [14:31<33:17,  3.16it/s]

{'loss': 1.636, 'learning_rate': 1.3935384276105416e-05, 'epoch': 0.91}


 31%|███       | 2800/9069 [14:47<32:31,  3.21it/s]

{'loss': 1.5977, 'learning_rate': 1.3825118535670968e-05, 'epoch': 0.93}


 31%|███▏      | 2850/9069 [15:02<32:10,  3.22it/s]

{'loss': 1.6449, 'learning_rate': 1.3714852795236522e-05, 'epoch': 0.94}


 32%|███▏      | 2900/9069 [15:18<32:32,  3.16it/s]

{'loss': 1.6947, 'learning_rate': 1.3604587054802074e-05, 'epoch': 0.96}


 33%|███▎      | 2950/9069 [15:34<32:39,  3.12it/s]

{'loss': 1.5826, 'learning_rate': 1.3494321314367626e-05, 'epoch': 0.98}


 33%|███▎      | 3000/9069 [15:50<31:29,  3.21it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-3000
Configuration saved in t5_intent_response_model\checkpoint-3000\config.json


{'loss': 1.6291, 'learning_rate': 1.3384055573933179e-05, 'epoch': 0.99}


Model weights saved in t5_intent_response_model\checkpoint-3000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-3000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-3000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-2000] due to args.save_total_limit
 33%|███▎      | 3023/9069 [15:58<32:39,  3.09it/s]  ***** Running Evaluation *****
  Num examples = 2688
  Batch size = 8
                                                   
 33%|███▎      | 3023/9069 [18:41<32:39,  3.09it/s]

{'eval_loss': 1.347261905670166, 'eval_rouge1': 0.1927209768109747, 'eval_rouge2': 0.09915281548336663, 'eval_rougeL': 0.16519462018856795, 'eval_rougeLsum': 0.16511651889046125, 'eval_runtime': 162.6309, 'eval_samples_per_second': 16.528, 'eval_steps_per_second': 2.066, 'epoch': 1.0}


 34%|███▎      | 3050/9069 [18:49<31:53,  3.14it/s]   

{'loss': 1.6133, 'learning_rate': 1.3273789833498734e-05, 'epoch': 1.01}


 34%|███▍      | 3100/9069 [19:05<31:07,  3.20it/s]

{'loss': 1.6052, 'learning_rate': 1.3163524093064287e-05, 'epoch': 1.03}


 35%|███▍      | 3150/9069 [19:21<30:51,  3.20it/s]

{'loss': 1.6582, 'learning_rate': 1.3053258352629839e-05, 'epoch': 1.04}


 35%|███▌      | 3200/9069 [19:36<30:41,  3.19it/s]

{'loss': 1.616, 'learning_rate': 1.2942992612195393e-05, 'epoch': 1.06}


 36%|███▌      | 3250/9069 [19:52<30:50,  3.14it/s]

{'loss': 1.5695, 'learning_rate': 1.2832726871760945e-05, 'epoch': 1.08}


 36%|███▋      | 3300/9069 [20:09<31:01,  3.10it/s]

{'loss': 1.5729, 'learning_rate': 1.2722461131326498e-05, 'epoch': 1.09}


 37%|███▋      | 3350/9069 [20:25<29:56,  3.18it/s]

{'loss': 1.5879, 'learning_rate': 1.261219539089205e-05, 'epoch': 1.11}


 37%|███▋      | 3400/9069 [20:41<29:31,  3.20it/s]

{'loss': 1.5739, 'learning_rate': 1.2501929650457606e-05, 'epoch': 1.12}


 38%|███▊      | 3450/9069 [20:57<29:23,  3.19it/s]

{'loss': 1.556, 'learning_rate': 1.2391663910023158e-05, 'epoch': 1.14}


 39%|███▊      | 3500/9069 [21:12<29:47,  3.12it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-3500
Configuration saved in t5_intent_response_model\checkpoint-3500\config.json


{'loss': 1.5814, 'learning_rate': 1.228139816958871e-05, 'epoch': 1.16}


Model weights saved in t5_intent_response_model\checkpoint-3500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-3500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-3500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-2500] due to args.save_total_limit
 39%|███▉      | 3550/9069 [21:29<29:11,  3.15it/s]  

{'loss': 1.5592, 'learning_rate': 1.2171132429154263e-05, 'epoch': 1.17}


 40%|███▉      | 3600/9069 [21:45<28:47,  3.17it/s]

{'loss': 1.5447, 'learning_rate': 1.2060866688719817e-05, 'epoch': 1.19}


 40%|████      | 3650/9069 [22:01<28:37,  3.15it/s]

{'loss': 1.5381, 'learning_rate': 1.1950600948285369e-05, 'epoch': 1.21}


 41%|████      | 3700/9069 [22:17<29:30,  3.03it/s]

{'loss': 1.5571, 'learning_rate': 1.1840335207850921e-05, 'epoch': 1.22}


 41%|████▏     | 3750/9069 [22:32<27:29,  3.22it/s]

{'loss': 1.5522, 'learning_rate': 1.1730069467416473e-05, 'epoch': 1.24}


 42%|████▏     | 3800/9069 [22:48<27:05,  3.24it/s]

{'loss': 1.5332, 'learning_rate': 1.1619803726982029e-05, 'epoch': 1.26}


 42%|████▏     | 3850/9069 [23:04<27:12,  3.20it/s]

{'loss': 1.5077, 'learning_rate': 1.1509537986547581e-05, 'epoch': 1.27}


 43%|████▎     | 3900/9069 [23:20<27:50,  3.09it/s]

{'loss': 1.496, 'learning_rate': 1.1399272246113134e-05, 'epoch': 1.29}


 44%|████▎     | 3950/9069 [23:35<27:29,  3.10it/s]

{'loss': 1.5542, 'learning_rate': 1.1289006505678686e-05, 'epoch': 1.31}


 44%|████▍     | 4000/9069 [23:51<27:20,  3.09it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-4000
Configuration saved in t5_intent_response_model\checkpoint-4000\config.json


{'loss': 1.496, 'learning_rate': 1.117874076524424e-05, 'epoch': 1.32}


Model weights saved in t5_intent_response_model\checkpoint-4000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-4000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-4000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-3000] due to args.save_total_limit
 45%|████▍     | 4050/9069 [24:09<26:25,  3.17it/s]  

{'loss': 1.5336, 'learning_rate': 1.1068475024809792e-05, 'epoch': 1.34}


 45%|████▌     | 4100/9069 [24:25<26:25,  3.13it/s]

{'loss': 1.5357, 'learning_rate': 1.0958209284375345e-05, 'epoch': 1.36}


 46%|████▌     | 4150/9069 [24:41<25:47,  3.18it/s]

{'loss': 1.5096, 'learning_rate': 1.0847943543940897e-05, 'epoch': 1.37}


 46%|████▋     | 4200/9069 [24:56<25:38,  3.17it/s]

{'loss': 1.5271, 'learning_rate': 1.0737677803506453e-05, 'epoch': 1.39}


 47%|████▋     | 4250/9069 [25:12<24:55,  3.22it/s]

{'loss': 1.5494, 'learning_rate': 1.0627412063072005e-05, 'epoch': 1.41}


 47%|████▋     | 4300/9069 [25:28<24:57,  3.18it/s]

{'loss': 1.5091, 'learning_rate': 1.0517146322637557e-05, 'epoch': 1.42}


 48%|████▊     | 4350/9069 [25:44<24:34,  3.20it/s]

{'loss': 1.5003, 'learning_rate': 1.040688058220311e-05, 'epoch': 1.44}


 49%|████▊     | 4400/9069 [25:59<24:16,  3.20it/s]

{'loss': 1.5034, 'learning_rate': 1.0296614841768664e-05, 'epoch': 1.46}


 49%|████▉     | 4450/9069 [26:15<24:00,  3.21it/s]

{'loss': 1.5123, 'learning_rate': 1.0186349101334216e-05, 'epoch': 1.47}


 50%|████▉     | 4500/9069 [26:31<23:54,  3.19it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-4500
Configuration saved in t5_intent_response_model\checkpoint-4500\config.json


{'loss': 1.5109, 'learning_rate': 1.0076083360899768e-05, 'epoch': 1.49}


Model weights saved in t5_intent_response_model\checkpoint-4500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-4500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-4500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-3500] due to args.save_total_limit
 50%|█████     | 4550/9069 [26:48<23:32,  3.20it/s]

{'loss': 1.5143, 'learning_rate': 9.965817620465322e-06, 'epoch': 1.51}


 51%|█████     | 4600/9069 [27:03<23:26,  3.18it/s]

{'loss': 1.5234, 'learning_rate': 9.855551880030874e-06, 'epoch': 1.52}


 51%|█████▏    | 4650/9069 [27:19<23:26,  3.14it/s]

{'loss': 1.4812, 'learning_rate': 9.745286139596428e-06, 'epoch': 1.54}


 52%|█████▏    | 4700/9069 [27:35<22:38,  3.22it/s]

{'loss': 1.4967, 'learning_rate': 9.63502039916198e-06, 'epoch': 1.55}


 52%|█████▏    | 4750/9069 [27:50<22:36,  3.18it/s]

{'loss': 1.4508, 'learning_rate': 9.524754658727535e-06, 'epoch': 1.57}


 53%|█████▎    | 4800/9069 [28:06<22:47,  3.12it/s]

{'loss': 1.4538, 'learning_rate': 9.414488918293087e-06, 'epoch': 1.59}


 53%|█████▎    | 4850/9069 [28:22<22:02,  3.19it/s]

{'loss': 1.4902, 'learning_rate': 9.30422317785864e-06, 'epoch': 1.6}


 54%|█████▍    | 4900/9069 [28:38<21:42,  3.20it/s]

{'loss': 1.4901, 'learning_rate': 9.193957437424193e-06, 'epoch': 1.62}


 55%|█████▍    | 4950/9069 [28:54<21:52,  3.14it/s]

{'loss': 1.449, 'learning_rate': 9.083691696989746e-06, 'epoch': 1.64}


 55%|█████▌    | 5000/9069 [29:09<21:16,  3.19it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-5000
Configuration saved in t5_intent_response_model\checkpoint-5000\config.json


{'loss': 1.4633, 'learning_rate': 8.9734259565553e-06, 'epoch': 1.65}


Model weights saved in t5_intent_response_model\checkpoint-5000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-5000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-5000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-4000] due to args.save_total_limit
 56%|█████▌    | 5050/9069 [29:26<20:47,  3.22it/s]

{'loss': 1.4822, 'learning_rate': 8.863160216120852e-06, 'epoch': 1.67}


 56%|█████▌    | 5100/9069 [29:42<20:47,  3.18it/s]

{'loss': 1.4936, 'learning_rate': 8.752894475686406e-06, 'epoch': 1.69}


 57%|█████▋    | 5150/9069 [29:57<20:14,  3.23it/s]

{'loss': 1.4714, 'learning_rate': 8.642628735251958e-06, 'epoch': 1.7}


 57%|█████▋    | 5200/9069 [30:13<19:58,  3.23it/s]

{'loss': 1.4724, 'learning_rate': 8.53236299481751e-06, 'epoch': 1.72}


 58%|█████▊    | 5250/9069 [30:30<20:53,  3.05it/s]

{'loss': 1.4824, 'learning_rate': 8.422097254383065e-06, 'epoch': 1.74}


 58%|█████▊    | 5300/9069 [30:46<19:43,  3.19it/s]

{'loss': 1.4631, 'learning_rate': 8.311831513948617e-06, 'epoch': 1.75}


 59%|█████▉    | 5350/9069 [31:02<19:32,  3.17it/s]

{'loss': 1.4588, 'learning_rate': 8.201565773514169e-06, 'epoch': 1.77}


 60%|█████▉    | 5400/9069 [31:18<19:09,  3.19it/s]

{'loss': 1.4445, 'learning_rate': 8.091300033079723e-06, 'epoch': 1.79}


 60%|██████    | 5450/9069 [31:33<18:46,  3.21it/s]

{'loss': 1.4887, 'learning_rate': 7.981034292645275e-06, 'epoch': 1.8}


 61%|██████    | 5500/9069 [31:49<18:41,  3.18it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-5500
Configuration saved in t5_intent_response_model\checkpoint-5500\config.json


{'loss': 1.444, 'learning_rate': 7.87076855221083e-06, 'epoch': 1.82}


Model weights saved in t5_intent_response_model\checkpoint-5500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-5500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-5500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-4500] due to args.save_total_limit
 61%|██████    | 5550/9069 [32:06<18:30,  3.17it/s]

{'loss': 1.497, 'learning_rate': 7.760502811776382e-06, 'epoch': 1.84}


 62%|██████▏   | 5600/9069 [32:21<18:00,  3.21it/s]

{'loss': 1.4748, 'learning_rate': 7.650237071341936e-06, 'epoch': 1.85}


 62%|██████▏   | 5650/9069 [32:37<17:33,  3.25it/s]

{'loss': 1.4795, 'learning_rate': 7.539971330907487e-06, 'epoch': 1.87}


 63%|██████▎   | 5700/9069 [32:53<17:28,  3.21it/s]

{'loss': 1.4661, 'learning_rate': 7.429705590473041e-06, 'epoch': 1.89}


 63%|██████▎   | 5750/9069 [33:08<17:20,  3.19it/s]

{'loss': 1.4313, 'learning_rate': 7.3194398500385935e-06, 'epoch': 1.9}


 64%|██████▍   | 5800/9069 [33:24<17:11,  3.17it/s]

{'loss': 1.4502, 'learning_rate': 7.209174109604147e-06, 'epoch': 1.92}


 65%|██████▍   | 5850/9069 [33:40<16:44,  3.20it/s]

{'loss': 1.4101, 'learning_rate': 7.098908369169699e-06, 'epoch': 1.94}


 65%|██████▌   | 5900/9069 [33:55<16:22,  3.23it/s]

{'loss': 1.4434, 'learning_rate': 6.988642628735253e-06, 'epoch': 1.95}


 66%|██████▌   | 5950/9069 [34:11<16:04,  3.23it/s]

{'loss': 1.4927, 'learning_rate': 6.878376888300805e-06, 'epoch': 1.97}


 66%|██████▌   | 6000/9069 [34:27<16:04,  3.18it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-6000
Configuration saved in t5_intent_response_model\checkpoint-6000\config.json


{'loss': 1.4373, 'learning_rate': 6.768111147866358e-06, 'epoch': 1.98}


Model weights saved in t5_intent_response_model\checkpoint-6000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-6000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-6000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-5000] due to args.save_total_limit
 67%|██████▋   | 6046/9069 [34:42<15:40,  3.21it/s]***** Running Evaluation *****
  Num examples = 2688
  Batch size = 8
                                                   
 67%|██████▋   | 6046/9069 [37:23<15:40,  3.21it/s]

{'eval_loss': 1.2020255327224731, 'eval_rouge1': 0.194328838947907, 'eval_rouge2': 0.10644592654316545, 'eval_rougeL': 0.1697752114440692, 'eval_rougeLsum': 0.16969796305379858, 'eval_runtime': 161.0433, 'eval_samples_per_second': 16.691, 'eval_steps_per_second': 2.086, 'epoch': 2.0}


 67%|██████▋   | 6050/9069 [37:25<14:10:41, 16.91s/it]

{'loss': 1.4629, 'learning_rate': 6.657845407431911e-06, 'epoch': 2.0}


 67%|██████▋   | 6100/9069 [37:40<15:14,  3.25it/s]   

{'loss': 1.4033, 'learning_rate': 6.547579666997465e-06, 'epoch': 2.02}


 68%|██████▊   | 6150/9069 [37:56<15:11,  3.20it/s]

{'loss': 1.4386, 'learning_rate': 6.437313926563017e-06, 'epoch': 2.03}


 68%|██████▊   | 6200/9069 [38:11<14:42,  3.25it/s]

{'loss': 1.4022, 'learning_rate': 6.32704818612857e-06, 'epoch': 2.05}


 69%|██████▉   | 6250/9069 [38:27<14:54,  3.15it/s]

{'loss': 1.4791, 'learning_rate': 6.216782445694122e-06, 'epoch': 2.07}


 69%|██████▉   | 6300/9069 [38:43<14:30,  3.18it/s]

{'loss': 1.4651, 'learning_rate': 6.106516705259676e-06, 'epoch': 2.08}


 70%|███████   | 6350/9069 [38:58<14:05,  3.22it/s]

{'loss': 1.4108, 'learning_rate': 5.9962509648252296e-06, 'epoch': 2.1}


 71%|███████   | 6400/9069 [39:14<13:42,  3.25it/s]

{'loss': 1.4055, 'learning_rate': 5.885985224390782e-06, 'epoch': 2.12}


 71%|███████   | 6450/9069 [39:29<13:31,  3.23it/s]

{'loss': 1.3898, 'learning_rate': 5.775719483956336e-06, 'epoch': 2.13}


 72%|███████▏  | 6500/9069 [39:45<14:26,  2.97it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-6500
Configuration saved in t5_intent_response_model\checkpoint-6500\config.json


{'loss': 1.3828, 'learning_rate': 5.665453743521888e-06, 'epoch': 2.15}


Model weights saved in t5_intent_response_model\checkpoint-6500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-6500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-6500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-5500] due to args.save_total_limit
 72%|███████▏  | 6550/9069 [40:03<13:06,  3.20it/s]

{'loss': 1.4271, 'learning_rate': 5.555188003087441e-06, 'epoch': 2.17}


 73%|███████▎  | 6600/9069 [40:19<12:48,  3.21it/s]

{'loss': 1.4018, 'learning_rate': 5.4449222626529945e-06, 'epoch': 2.18}


 73%|███████▎  | 6650/9069 [40:34<12:28,  3.23it/s]

{'loss': 1.4377, 'learning_rate': 5.334656522218548e-06, 'epoch': 2.2}


 74%|███████▍  | 6700/9069 [40:50<12:37,  3.13it/s]

{'loss': 1.4314, 'learning_rate': 5.2243907817841e-06, 'epoch': 2.22}


 74%|███████▍  | 6750/9069 [41:05<11:56,  3.24it/s]

{'loss': 1.4624, 'learning_rate': 5.114125041349654e-06, 'epoch': 2.23}


 75%|███████▍  | 6800/9069 [41:21<11:46,  3.21it/s]

{'loss': 1.4119, 'learning_rate': 5.003859300915206e-06, 'epoch': 2.25}


 76%|███████▌  | 6850/9069 [41:37<11:29,  3.22it/s]

{'loss': 1.435, 'learning_rate': 4.8935935604807585e-06, 'epoch': 2.27}


 76%|███████▌  | 6900/9069 [41:52<11:11,  3.23it/s]

{'loss': 1.424, 'learning_rate': 4.783327820046312e-06, 'epoch': 2.28}


 77%|███████▋  | 6950/9069 [42:08<11:21,  3.11it/s]

{'loss': 1.4215, 'learning_rate': 4.673062079611865e-06, 'epoch': 2.3}


 77%|███████▋  | 7000/9069 [42:24<10:41,  3.22it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-7000
Configuration saved in t5_intent_response_model\checkpoint-7000\config.json


{'loss': 1.3852, 'learning_rate': 4.562796339177418e-06, 'epoch': 2.32}


Model weights saved in t5_intent_response_model\checkpoint-7000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-7000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-7000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-6000] due to args.save_total_limit
 78%|███████▊  | 7050/9069 [42:41<10:25,  3.23it/s]

{'loss': 1.4622, 'learning_rate': 4.45253059874297e-06, 'epoch': 2.33}


 78%|███████▊  | 7100/9069 [42:56<10:09,  3.23it/s]

{'loss': 1.3982, 'learning_rate': 4.342264858308523e-06, 'epoch': 2.35}


 79%|███████▉  | 7150/9069 [43:12<09:58,  3.21it/s]

{'loss': 1.4712, 'learning_rate': 4.231999117874077e-06, 'epoch': 2.37}


 79%|███████▉  | 7200/9069 [43:27<09:41,  3.21it/s]

{'loss': 1.4459, 'learning_rate': 4.12173337743963e-06, 'epoch': 2.38}


 80%|███████▉  | 7250/9069 [43:43<09:34,  3.16it/s]

{'loss': 1.4026, 'learning_rate': 4.011467637005183e-06, 'epoch': 2.4}


 80%|████████  | 7300/9069 [43:59<09:06,  3.24it/s]

{'loss': 1.4044, 'learning_rate': 3.901201896570736e-06, 'epoch': 2.41}


 81%|████████  | 7350/9069 [44:16<09:00,  3.18it/s]

{'loss': 1.4341, 'learning_rate': 3.790936156136289e-06, 'epoch': 2.43}


 82%|████████▏ | 7400/9069 [44:32<08:51,  3.14it/s]

{'loss': 1.4196, 'learning_rate': 3.680670415701842e-06, 'epoch': 2.45}


 82%|████████▏ | 7450/9069 [44:48<08:41,  3.11it/s]

{'loss': 1.3948, 'learning_rate': 3.570404675267395e-06, 'epoch': 2.46}


 83%|████████▎ | 7500/9069 [45:04<08:13,  3.18it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-7500
Configuration saved in t5_intent_response_model\checkpoint-7500\config.json


{'loss': 1.4456, 'learning_rate': 3.4601389348329478e-06, 'epoch': 2.48}


Model weights saved in t5_intent_response_model\checkpoint-7500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-7500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-7500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-6500] due to args.save_total_limit
 83%|████████▎ | 7550/9069 [45:22<08:03,  3.14it/s]

{'loss': 1.4254, 'learning_rate': 3.349873194398501e-06, 'epoch': 2.5}


 84%|████████▍ | 7600/9069 [45:39<08:14,  2.97it/s]

{'loss': 1.4254, 'learning_rate': 3.2396074539640536e-06, 'epoch': 2.51}


 84%|████████▍ | 7650/9069 [45:56<07:29,  3.16it/s]

{'loss': 1.4002, 'learning_rate': 3.1293417135296068e-06, 'epoch': 2.53}


 85%|████████▍ | 7700/9069 [46:12<07:27,  3.06it/s]

{'loss': 1.4639, 'learning_rate': 3.0190759730951595e-06, 'epoch': 2.55}


 85%|████████▌ | 7750/9069 [46:28<07:02,  3.12it/s]

{'loss': 1.3832, 'learning_rate': 2.9088102326607127e-06, 'epoch': 2.56}


 86%|████████▌ | 7800/9069 [46:44<06:41,  3.16it/s]

{'loss': 1.3778, 'learning_rate': 2.7985444922262654e-06, 'epoch': 2.58}


 87%|████████▋ | 7850/9069 [47:00<06:27,  3.15it/s]

{'loss': 1.4039, 'learning_rate': 2.6882787517918185e-06, 'epoch': 2.6}


 87%|████████▋ | 7900/9069 [47:17<06:20,  3.07it/s]

{'loss': 1.4122, 'learning_rate': 2.5780130113573717e-06, 'epoch': 2.61}


 88%|████████▊ | 7950/9069 [47:33<05:59,  3.11it/s]

{'loss': 1.363, 'learning_rate': 2.4677472709229244e-06, 'epoch': 2.63}


 88%|████████▊ | 8000/9069 [47:49<05:37,  3.17it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-8000
Configuration saved in t5_intent_response_model\checkpoint-8000\config.json


{'loss': 1.4085, 'learning_rate': 2.3574815304884776e-06, 'epoch': 2.65}


Model weights saved in t5_intent_response_model\checkpoint-8000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-8000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-8000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-7000] due to args.save_total_limit
 89%|████████▉ | 8050/9069 [48:06<05:43,  2.97it/s]

{'loss': 1.3946, 'learning_rate': 2.2472157900540303e-06, 'epoch': 2.66}


 89%|████████▉ | 8100/9069 [48:23<05:05,  3.17it/s]

{'loss': 1.4019, 'learning_rate': 2.1369500496195834e-06, 'epoch': 2.68}


 90%|████████▉ | 8150/9069 [48:39<05:05,  3.01it/s]

{'loss': 1.4046, 'learning_rate': 2.026684309185136e-06, 'epoch': 2.7}


 90%|█████████ | 8200/9069 [48:55<04:37,  3.13it/s]

{'loss': 1.4231, 'learning_rate': 1.9164185687506893e-06, 'epoch': 2.71}


 91%|█████████ | 8250/9069 [49:11<04:22,  3.12it/s]

{'loss': 1.4088, 'learning_rate': 1.8061528283162422e-06, 'epoch': 2.73}


 92%|█████████▏| 8300/9069 [49:27<04:00,  3.20it/s]

{'loss': 1.3997, 'learning_rate': 1.6958870878817954e-06, 'epoch': 2.75}


 92%|█████████▏| 8350/9069 [49:44<04:17,  2.80it/s]

{'loss': 1.4085, 'learning_rate': 1.5856213474473483e-06, 'epoch': 2.76}


 93%|█████████▎| 8400/9069 [50:02<03:38,  3.06it/s]

{'loss': 1.3583, 'learning_rate': 1.4753556070129013e-06, 'epoch': 2.78}


 93%|█████████▎| 8450/9069 [50:19<03:16,  3.15it/s]

{'loss': 1.4329, 'learning_rate': 1.3650898665784542e-06, 'epoch': 2.8}


 94%|█████████▎| 8500/9069 [50:35<03:11,  2.96it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-8500
Configuration saved in t5_intent_response_model\checkpoint-8500\config.json


{'loss': 1.4369, 'learning_rate': 1.2548241261440071e-06, 'epoch': 2.81}


Model weights saved in t5_intent_response_model\checkpoint-8500\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-8500\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-8500\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-7500] due to args.save_total_limit
 94%|█████████▍| 8550/9069 [50:54<02:58,  2.90it/s]

{'loss': 1.4242, 'learning_rate': 1.1445583857095603e-06, 'epoch': 2.83}


 95%|█████████▍| 8600/9069 [51:12<02:44,  2.85it/s]

{'loss': 1.3864, 'learning_rate': 1.0342926452751132e-06, 'epoch': 2.84}


 95%|█████████▌| 8650/9069 [51:30<03:46,  1.85it/s]

{'loss': 1.4155, 'learning_rate': 9.240269048406661e-07, 'epoch': 2.86}


 96%|█████████▌| 8700/9069 [51:47<01:56,  3.15it/s]

{'loss': 1.3905, 'learning_rate': 8.137611644062191e-07, 'epoch': 2.88}


 96%|█████████▋| 8750/9069 [52:04<01:41,  3.14it/s]

{'loss': 1.3811, 'learning_rate': 7.03495423971772e-07, 'epoch': 2.89}


 97%|█████████▋| 8800/9069 [52:20<01:28,  3.03it/s]

{'loss': 1.4165, 'learning_rate': 5.932296835373251e-07, 'epoch': 2.91}


 98%|█████████▊| 8850/9069 [52:36<01:11,  3.06it/s]

{'loss': 1.4244, 'learning_rate': 4.82963943102878e-07, 'epoch': 2.93}


 98%|█████████▊| 8900/9069 [52:52<00:53,  3.18it/s]

{'loss': 1.4337, 'learning_rate': 3.7269820266843095e-07, 'epoch': 2.94}


 99%|█████████▊| 8950/9069 [53:08<00:38,  3.12it/s]

{'loss': 1.3695, 'learning_rate': 2.6243246223398394e-07, 'epoch': 2.96}


 99%|█████████▉| 9000/9069 [53:24<00:21,  3.17it/s]Saving model checkpoint to t5_intent_response_model\checkpoint-9000
Configuration saved in t5_intent_response_model\checkpoint-9000\config.json


{'loss': 1.3982, 'learning_rate': 1.5216672179953688e-07, 'epoch': 2.98}


Model weights saved in t5_intent_response_model\checkpoint-9000\pytorch_model.bin
tokenizer config file saved in t5_intent_response_model\checkpoint-9000\tokenizer_config.json
Special tokens file saved in t5_intent_response_model\checkpoint-9000\special_tokens_map.json
Deleting older checkpoint [t5_intent_response_model\checkpoint-8000] due to args.save_total_limit
100%|█████████▉| 9050/9069 [53:42<00:05,  3.19it/s]

{'loss': 1.4275, 'learning_rate': 4.190098136508987e-08, 'epoch': 2.99}


100%|██████████| 9069/9069 [53:48<00:00,  2.93it/s]***** Running Evaluation *****
  Num examples = 2688
  Batch size = 8
                                                   
100%|██████████| 9069/9069 [57:06<00:00,  2.93it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 9069/9069 [57:06<00:00,  2.65it/s]

{'eval_loss': 1.1699498891830444, 'eval_rouge1': 0.19446302791821615, 'eval_rouge2': 0.10728213799300332, 'eval_rougeL': 0.17024564120189278, 'eval_rougeLsum': 0.17014773440694453, 'eval_runtime': 198.122, 'eval_samples_per_second': 13.567, 'eval_steps_per_second': 1.696, 'epoch': 3.0}
{'train_runtime': 3426.7818, 'train_samples_per_second': 21.172, 'train_steps_per_second': 2.647, 'train_loss': 1.6643453659644818, 'epoch': 3.0}





TrainOutput(global_step=9069, training_loss=1.6643453659644818, metrics={'train_runtime': 3426.7818, 'train_samples_per_second': 21.172, 'train_steps_per_second': 2.647, 'train_loss': 1.6643453659644818, 'epoch': 3.0})