In [1]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from bert_score import score
import re
from huggingface_hub import login

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pd.read_csv('../../tech_terms.csv')

Unnamed: 0,Term,Context Sentence,Prompt,Ground Truth
0,Containerization,Modern applications often rely on containeriza...,"Define the term ""Containerization"" in one sent...",Containerization is a software deployment meth...
1,Inheritance,This software module uses inheritance to share...,"Define the term ""Inheritance"" in one sentence ...",Inheritance is an object-oriented programming ...
2,Latency,Reducing latency is critical in real-time comm...,"Define the term ""Latency"" in one sentence base...",Latency refers to the delay between a user's a...
3,Orchestration,The team implemented orchestration tools to ma...,"Define the term ""Orchestration"" in one sentenc...",Orchestration refers to the automated coordina...
4,Pipeline,Data scientists configured a pipeline to autom...,"Define the term ""Pipeline"" in one sentence bas...",A pipeline is a sequence of data processing st...
...,...,...,...,...
95,Linter,The linter flagged several issues before the c...,"Define the term ""Linter"" in one sentence based...",A tool that analyzes source code to detect sty...
96,Static Analysis,Static analysis caught several bugs that hadn'...,"Define the term ""Static Analysis"" in one sente...",The process of examining code for errors or is...
97,Tokenization,Tokenization was the first step in preparing t...,"Define the term ""Tokenization"" in one sentence...",The process of splitting text into smaller uni...
98,Version Control,Version control made it easier to track change...,"Define the term ""Version Control"" in one sente...",A system that records changes to files or code...


In [None]:
login("--------------")  # Replace with your Hugging Face token

In [4]:
df = pd.read_csv("../../tech_terms.csv") 
required_cols = ["Term", "Context Sentence", "Prompt", "Ground Truth"]
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
    raise ValueError(f"Missing columns: {missing_cols}")

df = df.dropna(subset=["Prompt", "Ground Truth"]).reset_index(drop=True)

In [5]:
model_id = "google/gemma-2b"  
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=100,
    do_sample=False
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.88it/s]
Device set to use cpu
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [6]:
outputs = []
for prompt in df["Prompt"]:
    try:
        result = generator(prompt)[0]["generated_text"]
        response = result.replace(prompt, "").strip()
    except Exception as e:
        response = f"[ERROR: {str(e)}]"
    outputs.append(response)

df["Gemma_2B_Output"] = outputs

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

In [7]:
df

Unnamed: 0,Term,Context Sentence,Prompt,Ground Truth,Gemma_2B_Output
0,Containerization,Modern applications often rely on containeriza...,"Define the term ""Containerization"" in one sent...",Containerization is a software deployment meth...,Answer:\n\nStep 1/2\nContainerization is a tec...
1,Inheritance,This software module uses inheritance to share...,"Define the term ""Inheritance"" in one sentence ...",Inheritance is an object-oriented programming ...,"Answer:\n\nStep 1/2\nThe term ""Inheritance"" re..."
2,Latency,Reducing latency is critical in real-time comm...,"Define the term ""Latency"" in one sentence base...",Latency refers to the delay between a user's a...,Answer:\n\nLatency is the time it takes for a ...
3,Orchestration,The team implemented orchestration tools to ma...,"Define the term ""Orchestration"" in one sentenc...",Orchestration refers to the automated coordina...,Answer:\n\nOrchestration is the process of con...
4,Pipeline,Data scientists configured a pipeline to autom...,"Define the term ""Pipeline"" in one sentence bas...",A pipeline is a sequence of data processing st...,"Answer:\n\nStep 1/2\nThe term ""Pipeline"" in th..."
...,...,...,...,...,...
95,Linter,The linter flagged several issues before the c...,"Define the term ""Linter"" in one sentence based...",A tool that analyzes source code to detect sty...,"Answer:\n\nStep 1/2\nContext sentence: ""Linter..."
96,Static Analysis,Static analysis caught several bugs that hadn'...,"Define the term ""Static Analysis"" in one sente...",The process of examining code for errors or is...,Answer:\n\nStep 1/2\nStatic Analysis is a soft...
97,Tokenization,Tokenization was the first step in preparing t...,"Define the term ""Tokenization"" in one sentence...",The process of splitting text into smaller uni...,Answer:\n\nTokenization is the process of brea...
98,Version Control,Version control made it easier to track change...,"Define the term ""Version Control"" in one sente...",A system that records changes to files or code...,Answer:\n\nVersion Control is a software tool ...


In [8]:
def clean_output(text):
    # Remove common prefixes and formatting
    text = re.sub(r"(?i)^answer:\s*", "", text)  # case-insensitive 'Answer:'
    text = re.sub(r"(?i)step\s*\d+/\d+\s*", "", text)  # Step 1/2 or 2/2 etc.
    text = re.sub(r"\n+", " ", text)  # Replace newlines with space
    return text.strip()

df["Gemma_2B_Output"] = df["Gemma_2B_Output"].apply(clean_output)

In [None]:
def deduplicate_sentences(text):
    sentences = list(dict.fromkeys(text.split('. ')))  # remove exact duplicates
    return '. '.join(sentences)

df["Gemma_2B_Output"] = df["Gemma_2B_Output"].apply(deduplicate_sentences)

In [10]:
df

Unnamed: 0,Term,Context Sentence,Prompt,Ground Truth,Gemma_2B_Output
0,Containerization,Modern applications often rely on containeriza...,"Define the term ""Containerization"" in one sent...",Containerization is a software deployment meth...,Containerization is a technique used to packag...
1,Inheritance,This software module uses inheritance to share...,"Define the term ""Inheritance"" in one sentence ...",Inheritance is an object-oriented programming ...,"The term ""Inheritance"" refers to the transfer ..."
2,Latency,Reducing latency is critical in real-time comm...,"Define the term ""Latency"" in one sentence base...",Latency refers to the delay between a user's a...,Latency is the time it takes for a computer to...
3,Orchestration,The team implemented orchestration tools to ma...,"Define the term ""Orchestration"" in one sentenc...",Orchestration refers to the automated coordina...,Orchestration is the process of controlling a ...
4,Pipeline,Data scientists configured a pipeline to autom...,"Define the term ""Pipeline"" in one sentence bas...",A pipeline is a sequence of data processing st...,"The term ""Pipeline"" in the context sentence ""T..."
...,...,...,...,...,...
95,Linter,The linter flagged several issues before the c...,"Define the term ""Linter"" in one sentence based...",A tool that analyzes source code to detect sty...,"Context sentence: ""Linter is a tool used to cl..."
96,Static Analysis,Static analysis caught several bugs that hadn'...,"Define the term ""Static Analysis"" in one sente...",The process of examining code for errors or is...,Static Analysis is a software testing techniqu...
97,Tokenization,Tokenization was the first step in preparing t...,"Define the term ""Tokenization"" in one sentence...",The process of splitting text into smaller uni...,Tokenization is the process of breaking down a...
98,Version Control,Version control made it easier to track change...,"Define the term ""Version Control"" in one sente...",A system that records changes to files or code...,Version Control is a software tool that allows...


In [11]:
P, R, F1 = score(
    df["Gemma_2B_Output"].tolist(),
    df["Ground Truth"].tolist(),
    lang="en",
    verbose=True
)

df["BERTScore_Precision"] = P
df["BERTScore_Recall"] = R
df["BERTScore_F1"] = F1

print(f"Average BERTScore F1: {F1.mean():.4f}")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  return forward_call(*args, **kwargs)
100%|██████████| 4/4 [00:07<00:00,  1.91s/it]


computing greedy matching.


100%|██████████| 2/2 [00:00<00:00, 85.59it/s]


done in 7.68 seconds, 13.02 sentences/sec
Average BERTScore F1: 0.8620


In [12]:
df

Unnamed: 0,Term,Context Sentence,Prompt,Ground Truth,Gemma_2B_Output,BERTScore_Precision,BERTScore_Recall,BERTScore_F1
0,Containerization,Modern applications often rely on containeriza...,"Define the term ""Containerization"" in one sent...",Containerization is a software deployment meth...,Containerization is a technique used to packag...,0.890981,0.935369,0.912636
1,Inheritance,This software module uses inheritance to share...,"Define the term ""Inheritance"" in one sentence ...",Inheritance is an object-oriented programming ...,"The term ""Inheritance"" refers to the transfer ...",0.859705,0.862280,0.860990
2,Latency,Reducing latency is critical in real-time comm...,"Define the term ""Latency"" in one sentence base...",Latency refers to the delay between a user's a...,Latency is the time it takes for a computer to...,0.871621,0.878898,0.875245
3,Orchestration,The team implemented orchestration tools to ma...,"Define the term ""Orchestration"" in one sentenc...",Orchestration refers to the automated coordina...,Orchestration is the process of controlling a ...,0.874204,0.892731,0.883370
4,Pipeline,Data scientists configured a pipeline to autom...,"Define the term ""Pipeline"" in one sentence bas...",A pipeline is a sequence of data processing st...,"The term ""Pipeline"" in the context sentence ""T...",0.830276,0.856762,0.843311
...,...,...,...,...,...,...,...,...
95,Linter,The linter flagged several issues before the c...,"Define the term ""Linter"" in one sentence based...",A tool that analyzes source code to detect sty...,"Context sentence: ""Linter is a tool used to cl...",0.856980,0.889557,0.872964
96,Static Analysis,Static analysis caught several bugs that hadn'...,"Define the term ""Static Analysis"" in one sente...",The process of examining code for errors or is...,Static Analysis is a software testing techniqu...,0.861479,0.912678,0.886340
97,Tokenization,Tokenization was the first step in preparing t...,"Define the term ""Tokenization"" in one sentence...",The process of splitting text into smaller uni...,Tokenization is the process of breaking down a...,0.838749,0.878198,0.858020
98,Version Control,Version control made it easier to track change...,"Define the term ""Version Control"" in one sente...",A system that records changes to files or code...,Version Control is a software tool that allows...,0.884055,0.894632,0.889312


In [None]:
df.to_csv("gemma2b_bert_score_results.csv", index=False)
print("Saved: gemma2b_bert_score_results.csv")