# Imports

In [None]:
# %pip install -q evaluate
# %pip install -q opendatasets
# %pip install -q --upgrade accelerate
# %pip install -q --upgrade transformers
# %pip install -q peft
# %pip install -q --upgrade bitsandbytes
# %pip install -q accelerate

In [1]:
import pandas as pd 
import torch
import torch.nn as nn
import torchtext
from torch.utils.data import Dataset, random_split
from typing import List, Dict, Union
from typing import Any, TypeVar
import pandas as pd
import os
import copy
import gc
import evaluate
import opendatasets as od
from huggingface_hub import login
from typing import Optional, Tuple, Union

from datasets import load_dataset, Features, Value
import accelerate

from peft import LoftQConfig, LoraConfig, get_peft_model, PeftModel

import transformers
from transformers.modeling_outputs import QuestionAnsweringModelOutput
from transformers import BertLMHeadModel, AutoConfig, BitsAndBytesConfig,Conv1D
from transformers import AutoTokenizer, Seq2SeqTrainingArguments 
from transformers import Seq2SeqTrainer, AutoModelForCausalLM, IntervalStrategy, AutoModelForQuestionAnswering

from sklearn.model_selection import train_test_split

2024-04-17 16:17:26.974111: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-17 16:17:26.974223: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-17 16:17:27.104202: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


set a seed and confirm CUDA support

In [2]:
torch.manual_seed(2137)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.deterministic = True

print("PyTorch Version: ", torch.__version__)
print("torchtext Version: ", torchtext.__version__)
print(f"Using {'GPU' if str(DEVICE) == 'cuda' else 'CPU'}.")

PyTorch Version:  2.1.2
torchtext Version:  0.16.2
Using GPU.


# Dataset Download

## Downloading MedDialog Dataset

NOTE: you will need a kaggle API key for the following to work

In [3]:
import json

# Path to your JSON file
json_file_path = "kaggle.json"

# Open the file and read the content
try:
  with open(json_file_path, "r") as f:
    json_data = json.load(f)
except FileNotFoundError:
  print(f"Error: JSON file not found at {json_file_path}")
  exit(1)

# Access username and key from the JSON data
try:
  username = json_data["username"]
  key = json_data["key"]
except KeyError:
  print("Error: 'username' or 'key' key not found in JSON data")
  exit(1)

Error: JSON file not found at kaggle.json


NameError: name 'json_data' is not defined

In [None]:
os.environ['KAGGLE_USERNAME'] = username
os.environ['KAGGLE_KEY'] = key

# Assign the Kaggle data set URL into variable
dataset = 'https://www.kaggle.com/datasets/dsxavier/diagnoise-me'
# Using opendatasets let's download the data sets
od.download(dataset, "dataset")

## Downloading USMLE Dataset

In [4]:
USMLE_dataset = load_dataset("GBaker/MedQA-USMLE-4-options", split="test")

In [5]:
print(USMLE_dataset[0])
print(len(USMLE_dataset))

{'question': 'A junior orthopaedic surgery resident is completing a carpal tunnel repair with the department chairman as the attending physician. During the case, the resident inadvertently cuts a flexor tendon. The tendon is repaired without complication. The attending tells the resident that the patient will do fine, and there is no need to report this minor complication that will not harm the patient, as he does not want to make the patient worry unnecessarily. He tells the resident to leave this complication out of the operative report. Which of the following is the correct next action for the resident to take?', 'answer': 'Tell the attending that he cannot fail to disclose this mistake', 'options': {'A': 'Disclose the error to the patient and put it in the operative report', 'B': 'Tell the attending that he cannot fail to disclose this mistake', 'C': 'Report the physician to the ethics committee', 'D': 'Refuse to dictate the operative report'}, 'meta_info': 'step1', 'answer_idx': 

# Load Datasets

## Loading MedDialog Dataset

In [59]:
DATA_PATH = "dataset\\diagnoise-me\\diagnose_en_dataset.feather"
DATA_PATH = "/kaggle/input/diagnoise-me/diagnose_en_dataset.feather"
SEQ_LEN: int = 1024
data = pd.read_feather(DATA_PATH)
SAMPLE_SIZE: int =  int(data.shape[0] * 0.01) #get 1% of the data
data = data[:SAMPLE_SIZE]
print(data.keys())
print(len(data))




Index(['id', 'Description', 'Doctor', 'Patient'], dtype='object')
2574


In [60]:
# Split data into train and eval sets with 70% for training
train_data, eval_data = train_test_split(data, test_size=0.1, random_state=42)

train_data = train_data.reset_index(drop=True)
eval_data = eval_data.reset_index(drop=True)

# Print the shapes of the train and eval sets
print("Train data shape:", train_data.shape)
print("Eval data shape:", eval_data.shape)

Train data shape: (2316, 4)
Eval data shape: (258, 4)


## Loading USMLE Dataset

In [8]:
USMLE_dataset = pd.DataFrame({'Doctor': USMLE_dataset["answer"], 'Patient': USMLE_dataset["question"], 'Options':USMLE_dataset["options"]})
# Print the shapes of the set
print("USMLELiveEQA data shape:", USMLE_dataset.shape)

USMLELiveEQA data shape: (1273, 3)


## Create an output directory

In [9]:
os.makedirs('./results', exist_ok = True)
OUTPUT_DIR: str = './results'

# Model

In [51]:
# tokens for the datset
MODEL_NAME: str = 'UnfilteredAI/Mia-1B'
BOS_TOKEN: str = '<|startoftext|>'
EOS_TOKEN: str = '<|endoftext|>'
PAD_TOKEN: str = '<|pad|>'

In [52]:
# Load tokenizer 
MAX_TOKEN_LENGTH = 1024

# for evaluation
ltokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, bos_token = BOS_TOKEN, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN)
ltokenizer.padding_side = 'left'
ltokenizer.truncation_side = 'left'

# for training
rtokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, bos_token = BOS_TOKEN, eos_token=EOS_TOKEN, pad_token=PAD_TOKEN)
rtokenizer.padding_side = 'right'
rtokenizer.truncation_side = 'right'

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [53]:
base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map={"": 0})
base_model.resize_token_embeddings(len(rtokenizer))

pytorch_model.bin:   0%|          | 0.00/433M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of BertLMHeadModel were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embedding(28999, 768)

In [54]:
print(base_model)

BertLMHeadModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28999, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
   

In [56]:
lora_config = LoraConfig(
    lora_alpha=16, # lora alpha for scaling
    r=16, # rank
    lora_dropout=0.05, #dropout
    use_rslora=True, #  sets the adapter scaling factor to lora_alpha/math.sqrt(r)
    bias="none", # dont train biases
    #target_modules=["query", "value"],
    #layers_to_transform=[20]
)
model = get_peft_model(base_model, lora_config)
model.gradient_checkpointing_enable()
model.enable_input_require_grads()

In [57]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
    return {"trainable": trainable_params, "all": all_param, "trainable%": 100 * trainable_params / all_param}

print_trainable_parameters(model)

trainable params: 589824 || all params: 108961931 || trainable%: 0.5413119927178971


{'trainable': 589824, 'all': 108961931, 'trainable%': 0.5413119927178971}

# Preparing Data for Training

## Custom Dataset

In [61]:
class DoctorPatientDataset(Dataset):
    
    def __init__(self, data, split):
        
        self.input_x: List = data["Patient"]
        self.target: List = data["Doctor"]
        self.split = split

        try:
            self.options: List = data["Options"]
        except:
            pass
            
    def __len__(self):
        return len(self.input_x)
    
    def __getitem__(self, idx):
        try:
            data = {
                'input': self.input_x[idx],
                'target': self.target[idx],
                'options': self.options[idx],
                'split': self.split
            }
        except:
            data = {
                'input': self.input_x[idx],
                'target': self.target[idx],
                'split': self.split
            }
        return data

In [62]:
train_dataset = DoctorPatientDataset(data = train_data, split = "train")
eval_dataset_1 = DoctorPatientDataset(data = eval_data, split = "eval")
eval_dataset_2 = DoctorPatientDataset(data = USMLE_dataset, split = "eval")

## Custom Data Collator

In [67]:
def format_text(message, tokenizer):
    text = tokenizer.apply_chat_template(
        message,
        tokenize=False,
        add_generation_prompt=False
    )
    return text

def custom_data_collator(features, return_tensors="pt"):
    batch = {}

    questions = [feature["input"] for feature in features]
    answers = [feature["target"] for feature in features]
    split = features[0]["split"]

    # training
    if split == 'train':
        tokenizer = rtokenizer
        bos_token = rtokenizer.bos_token
        eos_token = rtokenizer.eos_token
        prompts = [f"a medical student is preparing for her final examination. Her patient has said '{q}'. Explain to the student the most likely cause/course of action." for q in questions]
        #text = [f"{bos_token}Question:{q}.Answer:{t}{eos_token}" for q, t in zip(questions, answers)]
        messages = [[
            {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": a}
        ] for prompt, a in zip(prompts, answers)]

    # evaluation
    else:
        try:
            options = [feature["options"] for feature in features]
            multi_choice = True
        except:
            multi_choice = False


        # tokenizer for evaluation
        tokenizer = ltokenizer
        bos_token = ltokenizer.bos_token

        # Format text to be encoded
        if(multi_choice == False):
            # if we are not using the multiple choice dataset
            # text = [f"{bos_token}Question:{q}.Answer:" for q in questions]
            prompts = [f"a medical student is preparing for her final examination. Her patient has said '{q}'. Explain to the student the most likely cause/course of action." for q in questions]
            messages = [[
                {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
                {"role": "user", "content": prompt},
                {"role": "assistant", "content":""}
            ] for prompt in prompts]
        else:
            # if we are using the multiple choice dataset
            # prompts = [f"provided the following text about medical symptoms: '{q}' Please state the most likely cause/course of action from the options below: A: {o['A']} B: {o['B']} C: {o['C']} D: {o['D']} Please select your answer with the format shown in the following example:'The correct option is C'" for q, o in zip(questions, options)]
            # text = [f"{bos_token}Question:{p}.Answer:" for p in prompts]
            prompts = [f"a medical student is preparing for her final examination. Her patient has said '{q}'. Please clearly state a cause/course of action from the provided options:  A: {o['A']} B: {o['B']} C: {o['C']} D: {o['D']} and explain your answer" for q, o in zip(questions, options)]
            messages = [[
                {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
                {"role": "user", "content": prompt},
                {"role": "assistant", "content":""}
            ] for prompt in prompts]


    # Tokenize the text
    text = list(map(lambda x: format_text(x, tokenizer), messages))
    
    #encoding = tokenizer(text, truncation=True, padding='max_length', max_length=MAX_TOKEN_LENGTH, return_tensors=return_tensors, add_special_tokens=False)
    encoding = tokenizer(text, truncation=True, padding='max_length', max_length=512, return_tensors=return_tensors, add_special_tokens=False)

    # Prepare final batch dictionary
    batch["input_ids"] = encoding["input_ids"]
    batch["attention_mask"] = encoding["attention_mask"]

    if return_tensors in ["pt", "tf"]:
        batch["labels"] = copy.deepcopy(encoding["input_ids"])
    return batch

# Training

In [68]:
training_args = Seq2SeqTrainingArguments(
    output_dir = OUTPUT_DIR, 
    num_train_epochs = 1, 
    evaluation_strategy="steps",
    eval_steps = 10,
    logging_steps = 10,
    save_total_limit = 1,
    per_device_train_batch_size=8, 
    per_device_eval_batch_size=1, 
    warmup_steps=0, 
    weight_decay=0.01, 
    logging_dir='./logs',
    save_steps = 0,
    load_best_model_at_end=True,
    remove_unused_columns=False,
    report_to=['tensorboard']
    )

In [69]:
trainer = Seq2SeqTrainer(
    model=model, 
    args=training_args, 
    train_dataset=train_dataset,
    eval_dataset=eval_dataset_1, 
    data_collator=custom_data_collator
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [70]:
# trainer = None
# model = None
# base_model = None
# train_dataset = None
torch.cuda.empty_cache()
gc.collect()

2022

In [71]:
trainer.train()

Step,Training Loss,Validation Loss
10,10.6215,No log
20,9.8195,No log
30,8.9617,No log
40,8.0584,No log
50,7.4759,No log
60,7.0908,No log
70,6.666,No log
80,6.5569,No log
90,6.1642,No log
100,5.879,No log


TrainOutput(global_step=290, training_loss=6.243679664874899, metrics={'train_runtime': 367.3037, 'train_samples_per_second': 6.305, 'train_steps_per_second': 0.79, 'total_flos': 613974261510144.0, 'train_loss': 6.243679664874899, 'epoch': 1.0})

In [None]:
trainer.model.save_pretrained(f"{OUTPUT_DIR}/model_save")

# Load the Model

In [16]:
base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
base_model.resize_token_embeddings(len(rtokenizer))
model = PeftModel.from_pretrained(base_model, f"{OUTPUT_DIR}/model_save")
trainer = Seq2SeqTrainer(
    model=model, 
    args=training_args, 
    train_dataset=train_dataset,
    eval_dataset=eval_dataset_1, 
    data_collator=custom_data_collator
)
model.gradient_checkpointing_enable()
model.enable_input_require_grads()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [25]:
base_model = None
torch.cuda.empty_cache()
gc.collect()

1310

# Evaluation

In [None]:
eval_result_1 = trainer.predict(eval_dataset_1, max_new_tokens=MAX_TOKEN_LENGTH)
eval_result_2 = trainer.predict(eval_dataset_2, max_new_tokens=MAX_TOKEN_LENGTH)
logits_1 = eval_result_1.predictions
logits_1[logits_1 == -100] = ltokenizer.eos_token_id
logits_2 = eval_result_2.predictions
logits_2[logits_2 == -100] = ltokenizer.eos_token_id

In [None]:
# get the raw evaluation output
raw_text_result_1 = ltokenizer.batch_decode(logits_1, skip_special_tokens=True)
raw_text_result_2 = ltokenizer.batch_decode(logits_2, skip_special_tokens=True)

# get the questions and ground truths from both evaluation datasets
questions_1 = []
ground_truth_1 = []
try:
    for item in eval_dataset_1:
        questions_1.append(item["input"])
        ground_truth_1.append(item["target"])
except:
    pass

questions_2 = []
ground_truth_2 = []
try:
    for item in eval_dataset_2:
        questions_2.append(item["input"])
        ground_truth_2.append(item["target"])
except:
    pass

# create lists for the text outputs
text_result_1 = list()
text_result_2 = list()

# get the answers for the MedDialog dataset
for item in raw_text_result_1:
    index = item.find("Answer:")
    output = item[index+7:]
    index = output.find(ltokenizer.eos_token)
    if(index > -1):
        output = output[:index]
    text_result_1.append(output)


# get the answers for the USMLE dataset
for item in raw_text_result_2:
    index = item.find("Answer:")
    output = item[index+7:]
    index = output.find(ltokenizer.eos_token)
    if(index > -1):
        output = output[:index]
    text_result_2.append(output)



# print the first 2 results from each dataset evaluation
print("============================MedDialog Evaluation============================")
for question, answer in list(zip(questions_1, text_result_1))[:2]:
    print(f"""
    Question: {question}
    Answer: {answer}
    """)

print("============================USMLE Evaluation============================")
for question, answer in list(zip(questions_2, text_result_2))[:2]:
    print(f"""
    Question: {question}
    Answer: {answer}
    """)

# Results

## Load the Required Evaluation Metrics

In [None]:
# perplexity - measures certainty of the model.
# METEOR - extension of BLEU (measure similarity between the output and the ground truth) but accounts for word semantics.
# ROUGE - considers n-gram overlap (recall) but also precision.
# SQuAD v2 - a metric for measuring a models correctness in answering the multiple choice questions
# Accuracy - use this for the multiple choice dataset

perplexity_scorer = evaluate.load('perplexity')
meteor_scorer = evaluate.load('meteor')
rouge_scorer = evaluate.load('rouge')
squad_scorer = evaluate.load('squad_v2')
accuracy_scorer = evaluate.load('accuracy')


In [None]:
# compute the bleu and rouge scores for the MedDialog evaluation
bleu_score_1 = bleu_scorer.compute(predictions=text_result_1, references=ground_truth_1)
rouge_score_1 = rouge_scorer.compute(predictions=text_result_1, references=ground_truth_1)

# compute the bleu and rouge scores for the USMLE evaluation
bleu_score_2 = bleu_scorer.compute(predictions=text_result_1, references=ground_truth_2)
rouge_score_2 = rouge_scorer.compute(predictions=text_result_1, references=ground_truth_2)

In [None]:
# print scores for MedDialog evaluation
print("score on MedDialog Dataset")
print('BLEU1:', bleu_score_1['precisions'][0]*100)
print(f"""
ROUGE-1: {rouge_score_1['rouge1']*100}
ROUGE-2: {rouge_score_1['rouge2']*100}
ROUGE-L: {rouge_score_1['rougeL']*100}
""")

In [None]:
# print scores for USMLE evaluation
print("score on USMLE Dataset")
print('BLEU1:', bleu_score_2['precisions'][0]*100)
print(f"""
ROUGE-1: {rouge_score_2['rouge1']*100}
ROUGE-2: {rouge_score_2['rouge2']*100}
ROUGE-L: {rouge_score_2['rougeL']*100}
""")

In [None]:
import shutil
shutil.rmtree("/kaggle/working/logs")

In [None]:
print(model.config)

In [18]:
eval_result = trainer.predict(eval_dataset_1, max_new_tokens=20)

In [22]:
print(eval_result.predictions[0])

[9.339613  6.566526  6.977488  9.339613  6.8662615 6.7352104 6.104882
 6.8662615 9.339613  6.2845745 8.641244  6.459679  6.8199744 8.928269
 9.270691  9.898782  9.339613  6.91693   8.992699  9.339613  9.339613
 9.339613  9.474567  9.339613  6.243994  6.7588525]


In [24]:
test = ltokenizer.decode(eval_result.predictions[0], skip_special_tokens=True)

TypeError: argument 'ids': 'float' object cannot be interpreted as an integer

In [None]:
text = ltokenizer.batch_decode(eval_result, skip_special_tokens=True)

In [72]:
print(eval_dataset_1[0]['input'])

Hi doctor, I am a 46 year old male. My weight is 74 kg and I am having an excellent physical shape. I think I have some sleep issue. I did not get proper sleep for any days and hence, I feel tired and irritable at times. I am very active professionally and leading very engaging life. I am not suffering from any depression or mental issues. I get a little bit of anxiety at times, but nothing serious that I can feel. What could be the reason for this? What are the remedies? Please suggest me.


In [77]:
prompt = f"a medical student is preparing for her final examination. Her patient has come to her asking: '{eval_dataset_1[0]['input']}'. Explain to the student the most likely cause/course of action."
messages = [
    {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
    {"role": "user", "content": prompt}
]
text = ltokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
print(text)
model_inputs = ltokenizer(text, return_tensors="pt").to(DEVICE)

<|im_start|>system
You are a medical professional providing consultation and medical diagnostics.<|im_end|>
<|im_start|>user
a medical student is preparing for her final examination. Her patient has come to her asking: 'Hi doctor, I am a 46 year old male. My weight is 74 kg and I am having an excellent physical shape. I think I have some sleep issue. I did not get proper sleep for any days and hence, I feel tired and irritable at times. I am very active professionally and leading very engaging life. I am not suffering from any depression or mental issues. I get a little bit of anxiety at times, but nothing serious that I can feel. What could be the reason for this? What are the remedies? Please suggest me.'. Explain to the student the most likely cause/course of action.<|im_end|>
<|im_start|>assistant



In [75]:
generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=256
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = ltokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [76]:
print(response)

at pulsing warmid legally legally legally isolatedlate fresh legally isolatedoso most that Willuriuate am to will will most to to to will will will will will will will will will will will will will will will will will will will will will will willlideiu am inlate most most to to will most most most to to to will most most most to to will most to to will most to to will most to to will most most most to to will most to to will most to to will most to to will most to to will most to to will most catlorlorlor reduid " will itmenpi most most to to to to will they will most most to to to to will will they will will will reduidu to to to to


In [81]:
from transformers import AutoModel, AutoTokenizer, AutoConfig

tokenizer= AutoTokenizer.from_pretrained('UFNLP/gatortron-base')
config=AutoConfig.from_pretrained('UFNLP/gatortron-base')
mymodel=AutoModel.from_pretrained('UFNLP/gatortron-base')

encoded_input=tokenizer("Bone scan:  Negative for distant metastasis.", return_tensors="pt")
encoded_output = mymodel(**encoded_input)
print (encoded_output)
encoded_output = [
    output_ids[len(encoded_input):] for encoded_input, output_ids in zip(model_inputs.input_ids, generated_ids)
]
decoded = tokenizer.batch_decode(encoded_output)
print(decoded)

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.5924,  0.1630,  0.6396,  ...,  0.7088, -0.3500, -0.0225],
         [-0.7186,  0.7437,  0.4885,  ...,  0.4310, -0.2094,  0.2431],
         [-0.4013,  0.2324,  0.3076,  ...,  0.1691, -0.4022,  0.0109],
         ...,
         [ 0.0802,  0.5385, -0.0017,  ..., -0.2361, -0.2471,  0.0284],
         [-0.0093,  0.0155, -0.0356,  ..., -0.0494, -0.1186, -0.0447],
         [-0.5201, -0.0494,  0.9739,  ...,  0.5900, -0.1954,  0.0582]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-0.3146, -0.2337, -0.2550,  ...,  0.2328,  0.0557,  0.1983]],
       grad_fn=<TanhBackward0>), hidden_states=None, past_key_values=None, attentions=None, cross_attentions=None)


TypeError: argument 'ids': Can't extract `str` to `Vec`