In [None]:
from transformers import EncoderDecoderModel, BertTokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Load pre-trained BERT2BERT model
model = EncoderDecoderModel.from_encoder_decoder_pretrained("bert-base-uncased", "bert-base-uncased").to("cuda")
dataset = load_dataset("csv", data_files="/Users/eddie/Downloads/VLMTrain/MViTv2_Train_Recognition_Full.csv")
test_dataset = load_dataset("csv", data_files="/Users/eddie/Downloads/VLMTrain/MViTv2_Test_Recognition_Full.csv")


In [None]:
# ✅ Set decoder start token ID (Important!)
model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.pad_token_id = tokenizer.pad_token_id

In [None]:
def preprocess_data(examples):
    q = "What is the next action?"
    inputs = [f"Question: {q} Context: {c}" for c in zip(examples["context"])]
    targets = examples["answer"]
    model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=512)
    labels = tokenizer(targets, padding="max_length", truncation=True, max_length=25)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

dataset = dataset.map(preprocess_data, batched=True)
test_dataset = test_dataset.map(preprocess_data, batched = True)
print(dataset)


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./bert_qa",
    evaluation_strategy="epoch",
    save_strategy="no",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=30,
    weight_decay=0.01,
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset = test_dataset["train"],
)

trainer.train()


In [None]:
model.save_pretrained("./bert_full", from_pt=True) 
tokenizer.save_pretrained("./bert_full")

In [None]:
# from transformers import EncoderDecoderModel, BertTokenizer

# # Load trained model and tokenizer
model_name = "./Recognition/bert_full"  # Change to your actual model path
tokenizer = BertTokenizer.from_pretrained(model_name)
model = EncoderDecoderModel.from_pretrained(model_name).to("cuda")

# Ensure correct config
model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.sep_token_id
model.config.max_length = 150  # Adjust as needed

def generate_answer(context=""):
    input_text = context
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
    output_ids = model.generate(input_ids)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
# print(generate_answer("What is the next action?", "palpate landmark, take swab, prep site, take syringe"))

In [None]:
test_dataset = load_dataset("csv", data_files="/Users/eddie/Downloads/VLMTrain/MViTv2/MViTv2_Test_Recognition.csv")
test_dataset = test_dataset["train"]
# questions = test_dataset["question"]
context = test_dataset["context"]
labels = test_dataset["answer"]    
print(test_dataset)
print(labels)
print(context)

In [None]:
generated_answers = []
for i in range(len(context)):
    # print(questions[i], context[i])
    generated_answers.append(generate_answer(context[i]))
    print(generated_answers[i])
    
print(generated_answers)
print(labels)

In [None]:
correct = 0
total = 0
for i in range(len(generated_answers)):
    if((generated_answers[i])==(labels[i])):
        correct = correct+1
        print(correct, total)
    total = total+1
print(correct/total)