In [None]:
import torch
import torch.nn as nn
from transformers import T5Tokenizer, T5Config, T5Model, T5ForConditionalGeneration, Trainer, TrainingArguments
from datasets import load_dataset

def load_and_preprocess_data(tokenizer):
    dataset = load_dataset("cnn_dailymail", "3.0.0")
    train_data = dataset['train']
    val_data = dataset['validation']
    
    def tokenize_data(example):
        inputs = tokenizer(example['article'], truncation=True, padding='max_length', max_length=512)
        targets = tokenizer(example['highlights'], truncation=True, padding='max_length', max_length=150)
        example['input_ids'] = inputs['input_ids']
        example['attention_mask'] = inputs['attention_mask']
        example['labels'] = targets['input_ids']
        return example

    train_data = train_data.map(tokenize_data, batched=True)
    val_data = val_data.map(tokenize_data, batched=True)
   
    return train_data, val_data

In [None]:
class EncoderDecoderAttentionModel(torch.nn.Module):
    def __init__(self, model_name='t5-small'):
        super(EncoderDecoderAttentionModel, self).__init__()
        self.t5_model = T5ForConditionalGeneration.from_pretrained(model_name)
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)
        
    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.t5_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        return outputs

In [None]:
def fine_tune_model(train_data, val_data, tokenizer, model):
    training_args = TrainingArguments(
        output_dir='./results',
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        save_steps=500,
        save_total_limit=2,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_data,
        eval_dataset=val_data,
        tokenizer=tokenizer,
    )

    trainer.train()

    model.save_pretrained('./fine_tuned_t5')

    return model

In [None]:
def main():
    tokenizer = T5Tokenizer.from_pretrained('t5-small')
    
    model = EncoderDecoderAttentionModel()
    
    train_data, val_data = load_and_preprocess_data(tokenizer)
    
    model = fine_tune_model(train_data, val_data, tokenizer, model)
    #as i trained my model wiht gpu on google collab, i uploaded my model and tokenizer to hugging face, streamlit uses my uploaded model and tokenizer.
    #but i still added my training and data preprocessing code and how should we call them in main func.


In [None]:
if __name__ == "__main__":
    main()