In [7]:
from transformers import AutoTokenizer, Gemma3ForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig

import torch

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
GEMMA_PATH = "./Gemma-3-12B-QMSum-QA-v3"
LORA_PATH = "./output/checkpoint-5470"

In [9]:
# First, load the base model
base_model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-3-12b-it",
    torch_dtype=torch.bfloat16,
    attn_implementation='eager',
    device_map="auto",
)

# Then load the LoRA adapter on top of it
model = PeftModel.from_pretrained(base_model, LORA_PATH)

# Optionally merge weights for inference (removes adapter overhead)
model = model.merge_and_unload()
model.to("cuda")

tokenizer = AutoTokenizer.from_pretrained(GEMMA_PATH)

Loading checkpoint shards: 100%|██████████| 5/5 [01:23<00:00, 16.62s/it]


In [10]:
from datasets import load_from_disk

test_dataset = load_from_disk("processed_data/test")

In [11]:
print(test_dataset['prompt'][0])

print("=================")

print(test_dataset['completion'][0])

You are an assistant that answers questions about meeting transcripts.

Meeting Transcript:
Industrial Designer: Okay well um . So our design looks something like this . This being the wheel that you use to uh change channels or volume or whatever . This is a button , serves as the power button if you hold it down , and if you just tap on it I think it brings up the menu . And uh the base of the remote control , which has a squishy spongy rubbery feel , is interchangeable . So you can change the colour , according to your {disfmarker} to suit your living room or whatever . And it comes {disfmarker}
User Interface: You could change the vegetable , or fruit .
Industrial Designer: yeah , I can change the vegetable .
Project Manager: Oh is that broccoli ? {vocalsound}
Industrial Designer: This one's broccoli {vocalsound} .
User Interface: Yeah .
Industrial Designer: So this snaps off and you can put on whichever one you want . This is not to scale 'cause it would have the battery inside it

In [12]:
def extract_answer(text):
    # Split the text into lines
    lines = text.split('\n')
    
    # Find the line that starts with "Answer:"
    for line in lines:
        if line.strip().startswith("Answer:"):
            # Return the content after "Answer:"
            return line.strip()[len("Answer:"):].strip()
    
    # Return None if no answer found
    return None

In [None]:
# From Groundtruth
# def get_answer(index):
#     inputs = tokenizer(
#         [test_dataset["prompt"][index] + "\n\nAnswer: " + tokenizer.eos_token],
#         return_tensors="pt"
#     ).to("cuda")

#     outputs = model.generate(
#         input_ids=inputs.input_ids,
#         attention_mask=inputs.attention_mask,
#         max_new_tokens=64,
#         eos_token_id=tokenizer.eos_token_id,
#         use_cache=True,
#     )

#     response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
#     # print(response[0])
#     return extract_answer(response[0])

# From RAG
def get_answer(prompt):
    inputs = tokenizer(
        [prompt + "\n\nAnswer: " + tokenizer.eos_token],
        return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=64,
        eos_token_id=tokenizer.eos_token_id,
        use_cache=True,
    )

    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    # print(response[0])
    return extract_answer(response[0])

In [14]:
# print(get_answer(0))

In [15]:
import pandas as pd
test_data = pd.read_csv("./prompt.csv", delimiter="|")
test_data

Unnamed: 0,query,answer,retrieved_transcripts,prompt
0,Summarize the discussion about the efficacy of...,Barry Hughes first stated that children had fe...,"[""Barry Hughes: I think the law should reflect...",You are an assistant that answers questions ab...
1,What did Barry Hughes think about the legal fr...,Barry thought that the legal framework would m...,"[""Barry Hughes: I think the law should reflect...",You are an assistant that answers questions ab...
2,What was the two-stage test during prosecution...,The first stage was to check whether there was...,"[""Lynne Neagle AM: Just before you move on, ca...",You are an assistant that answers questions ab...
3,Summarize the discussion about the divergence ...,Barry Hughes indicated that the Bill would imp...,"[""Barry Hughes: If this Bill goes through, the...",You are an assistant that answers questions ab...
4,What did Barry Hughes think about the implicat...,Barry Hughes concluded two kinds of implicatio...,"['Dawn Bowden AM: Okay. So, would you see the ...",You are an assistant that answers questions ab...
...,...,...,...,...
239,What did the team think about Marketing's sugg...,Industrial Designer thought it would be lost e...,"[""Industrial Designer: Now what I was saying b...",You are an assistant that answers questions ab...
240,What did the team think of Marketing's idea of...,Project Manager believed that it would not app...,"['Marketing: {vocalsound} Okay , um , now the ...",You are an assistant that answers questions ab...
241,Summarize the design and availability of actua...,The chip TA11835 and PCB board will be used in...,"['Marketing: {vocalsound} Okay , um , now the ...",You are an assistant that answers questions ab...
242,What are the designs of normal remotes and how...,"Mostly black and white colours, the normal one...","[""Industrial Designer: but the the thing with ...",You are an assistant that answers questions ab...


In [16]:
print(test_data.iloc[0]['prompt'])

You are an assistant that answers questions about meeting transcripts.

Meeting Transcript:
Barry Hughes: I think the law should reflect contemporary society and the law is evolutionary in nature, and should be evolutionary in nature. And just as if we go back 50 years there are types of behaviour that we might all have contemplated back then as being socially acceptable, many of those types of behaviour are not now, and the law has changed to reflect that. We don't need to go back a very long way to find, for example, that there was no such offence as raping one's married wife, and the law has changed to reflect the fact that that is simply not acceptable. And from what I've seen of the research, and I think back to the 2007 review, to which you've just referred, there is evidence within that review already that opinions are changing. And I've seen research more current than that that suggests that young people find it less acceptable to use reasonable punishment in chastising a child

In [None]:
llm_ans = []

# From Groundtruth
# for i in range(len(test_dataset)):
#     llm_ans.append([test_dataset[i]['completion'],get_answer(i)])

# From RAG
for index, row in test_data.iterrows():
    llm_ans.append([row['answer'], get_answer(row['prompt'])])

# save to csv
# import pandas as pd
df = pd.DataFrame(llm_ans, columns=['ground_truth_answer', 'predicted_answer'])
df.to_csv('gemma_3_12B_QMSumQA_RAG.csv', index=False)