In [None]:
import gc
import json
import time
import torch
import accelerate
import pandas as pd
from tqdm import tqdm
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig

In [None]:
# Loading samples from a file "vul_sample.csv"
df = pd.read_csv("../data/vul_sample.csv")
parsed_data = df[["CWE ID", "func_before", "len"]]

In [None]:
model_id = "stabilityai/stable-code-3b"

In [None]:
# Setting up for GPU offload
config = BitsAndBytesConfig(
    load_in_8bit=True,  # Quantization to 8 bits or 4 bits
)

In [None]:
# Loading a model with certain parameters
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float32,  # FP32 or FP16
    # quantization_config=config,  # Enable if there is not enough VRAM
    device_map="cuda",  # Loading the model and tokenizer on CUDA
)

In [None]:
# Loading a tokenizer for a model
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
# Creating a pipeline for text generation
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=64, temperature=0.3, do_sample=True)

# Creating a HuggingFacePipeline with a Pipeline
bloom = HuggingFacePipeline(pipeline=pipe)

In [None]:
# Template for question and answer
template = """System: You are a security researcher, expert in detecting security vulnerabilities. Provide response only in following format: vulnerability: <YES or NO> | vulnerability type: <CWE ID> | explanation: <explanation for prediction>. Use N/A in other fields if there are no vulnerabilities. Do not include anything else in response.
User: Evaluate the security of the following code snippet for potential vulnerabilities:
{vulnerable_code}

Response:"""
prompt = PromptTemplate(template=template, input_variables=["vulnerable_code"])

In [None]:
# Creating a LLMChain
llm_chain = LLMChain(prompt=prompt, llm=bloom)

In [None]:
results = []

# Running iterations over all sals from a parsed_data
for index, row in tqdm(parsed_data.iterrows(), total=len(parsed_data), desc="Processing", unit=" row"):
    result = {}
    
    # Sending a question to a model
    with torch.no_grad():
        start_time = time.time()
        generated_text = llm_chain.invoke(row["func_before"])['text'].strip()
        end_time = time.time()
        torch.cuda.empty_cache()
        
    print(f"Expected CWE ID: {row['CWE ID']}. The model answers:\n{generated_text}")
    print(f"\nExecution time: {(end_time - start_time):.2f} seconds")
        
    result["Expected CWE ID:"] = row["CWE ID"]
    result["generated_text"] = generated_text
    result["lead_time"] = end_time - start_time

    results.append(result)

# Saving results to a JSON file
model_name = model_id.split("/")[1]
output_file = f"../data/collected_generated_text/results_{model_name}.json"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=4)