In [12]:
# import subprocess
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pprint
import re

# from huggingface_hub import notebook_login
# notebook_login()

In [33]:
FEW_SHOT_PROMPT = """
Example 1:
Commit Message: "Use version in SSL_METHOD not SSL structure.
When deciding whether to use TLS 1.2 PRF and record hash algorithms use the version number in the corresponding SSL_METHOD structure instead of the SSL structure. The SSL structure version is sometimes inaccurate. Note: OpenSSL 1.0.2 and later effectively do this already. (CVE-2013-6449)"
Function Code:
" long ssl_get_algorithm2(SSL *s)
{{
    long alg2 = s->s3->tmp.new_cipher->algorithm2;
    if (TLS1_get_version(s) >= TLS1_2_VERSION &&
        alg2 == (SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF))
        return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256;
    return alg2;
}}"
Chain-of-Thought: The commit message clearly references CVE-2013-6449 and the function logic relies on an inaccurate version number from the SSL structure. This indicates a vulnerability.
Answer: Yes

Example 2:
Commit Message: "None"
Function Code:
"gnutls_session_get_data (gnutls_session_t session,
                         void *session_data, size_t * session_data_size)
{{
  gnutls_datum_t psession;
  int ret;
  if (session->internals.resumable == RESUME_FALSE)
    return GNUTLS_E_INVALID_SESSION;
  psession.data = session_data;
  ret = _gnutls_session_pack (session, &psession);
  if (ret < 0)
    {{
       gnutls_assert ();
       return ret;
     }}
  *session_data_size = psession.size;
  if (psession.size > *session_data_size)
     {{
       ret = GNUTLS_E_SHORT_MEMORY_BUFFER;
       goto error;
     }}
  if (session_data != NULL)
     memcpy (session_data, psession.data, psession.size);
  ret = 0;
error:
  _gnutls_free_datum (&psession);
  return ret;
}}"
Chain-of-Thought: The function calls memcpy without sufficient boundary checks and the context mentions CWE-119 and CVE-2011-4128. This is indicative of a buffer overflow vulnerability.
Answer: Yes

Example 3:
Commit Message: "None"
Function Code:
"gnutls_session_get_data (gnutls_session_t session,
                         void *session_data, size_t * session_data_size)
{{
  gnutls_datum_t psession;
  int ret;
  if (session->internals.resumable == RESUME_FALSE)
    return GNUTLS_E_INVALID_SESSION;
  psession.data = session_data;
  ret = _gnutls_session_pack (session, &psession);
  if (ret < 0)
    {{
      gnutls_assert ();
      return ret;
    }}
  if (psession.size > *session_data_size)
     {{
       ret = GNUTLS_E_SHORT_MEMORY_BUFFER;
       goto error;
     }}
  if (session_data != NULL)
    memcpy (session_data, psession.data, psession.size);
  ret = 0;
error:
  _gnutls_free_datum (&psession);
  return ret;
}}"
Chain-of-Thought: This function is nearly identical to Example 2. It exhibits a risk of buffer overflow due to inadequate checking before memcpy. The associated vulnerability is confirmed by CWE-119.
Answer: Yes

Now, I want you to act as a software vulnerability detector. Based on the commit message and the code snippet provided, determine if the commit introduces a vulnerability. Answer with "Yes" for vulnerable and "No" for not vulnerable. Provide a brief explanation of your reasoning.

Commit Message: "{commit_message}"
Function Code:
{func}
Chain-of-Thought:
"""

# New sample data (for instance, from data point idx 0)
NEW_SAMPLE_COMMIT_MESSAGE = (
    "Use version in SSL_METHOD not SSL structure.\n\n"
    "When deciding whether to use TLS 1.2 PRF and record hash algorithms\n"
    "use the version number in the corresponding SSL_METHOD structure\n"
    "instead of the SSL structure. The SSL structure version is sometimes inaccurate. "
    "Note: OpenSSL 1.0.2 and later effectively do this already.\n(CVE-2013-6449)"
)

NEW_SAMPLE_FUNC = (
    " long ssl_get_algorithm2(SSL *s)\n"
    "        {{\n"
    "        long alg2 = s->s3->tmp.new_cipher->algorithm2;\n"
    "       if (TLS1_get_version(s) >= TLS1_2_VERSION &&\n"
    "            alg2 == (SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF))\n"
    "                return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256;\n"
    "        return alg2;\n"
    "        }}\n"
)

# Construct the final prompt by inserting the new sample data
final_prompt = FEW_SHOT_PROMPT.format(
    commit_message=NEW_SAMPLE_COMMIT_MESSAGE,
    func=NEW_SAMPLE_FUNC
)

# MODEL_NAME = "llama3.2"

In [None]:
HUGGINGFACE_TOKEN = ""
cache_dir = "/speed-scratch/ra_mdash/tmp/huggingface"
model_name = "google/codegemma-2b" #"bigcode/starcoder2-3b"

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir, use_auth_token=HUGGINGFACE_TOKEN)
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir, use_auth_token=HUGGINGFACE_TOKEN)

In [40]:
# Create a text-generation pipeline.
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)

# Set parameters for generation.
generation_args = {
    "max_new_tokens": 100,
    "temperature": 0.1,
    "do_sample": True,
}


# Run inference with the prompt.
output = generator(final_prompt, **generation_args)
match = re.search(r'Answer:\s*(Yes|No)', output[0]['generated_text'], re.IGNORECASE)
if match:
   match = match.group(1)

cot = re.search(r'Chain-of-thought:\s*(.*)', output[0]['generated_text'], re.IGNORECASE)
if cot:
    cot = cot.group(1).split("\n")[0]  # Extract the first line after "Chain-of-thought:"

print("Inference Output:")
print(output[0]['generated_text'])
print("Parsed output")
print(match)
print(cot)

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Inference Output:

Example 1:
Commit Message: "Use version in SSL_METHOD not SSL structure.
When deciding whether to use TLS 1.2 PRF and record hash algorithms use the version number in the corresponding SSL_METHOD structure instead of the SSL structure. The SSL structure version is sometimes inaccurate. Note: OpenSSL 1.0.2 and later effectively do this already. (CVE-2013-6449)"
Function Code:
" long ssl_get_algorithm2(SSL *s)
{
    long alg2 = s->s3->tmp.new_cipher->algorithm2;
    if (TLS1_get_version(s) >= TLS1_2_VERSION &&
        alg2 == (SSL_HANDSHAKE_MAC_DEFAULT|TLS1_PRF))
        return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256;
    return alg2;
}"
Chain-of-Thought: The commit message clearly references CVE-2013-6449 and the function logic relies on an inaccurate version number from the SSL structure. This indicates a vulnerability.
Answer: Yes

Example 2:
Commit Message: "None"
Function Code:
"gnutls_session_get_data (gnutls_session_t session,
                         void *s

In [5]:
import json
import time
import pandas as pd

In [None]:
On the other hand, I have a list of "projects" in another file. I want to collect 5 data samples of each project from this train.jsonl file where the target=0 and 5 5 data samples where the target=1. Then I will save them as a mapping where key is the project, and values are data samples nested by target. 

Please write the python code for these.

In [None]:
input_file = "PrimeVul_v0.1/primevul_valid.jsonl"
count = 0
max_test = 5000
results = []

with open(input_file, "r") as f:
    for line in f:
        if count < max_test:
            if line.strip():  # Skip empty lines
                try:
                    data = json.loads(line)
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON: {e}")
                    continue
    
                # Extract the fields
                project = data.get("project", ""),
                # commit_message = data.get("commit_message", "")
                # func = data.get("func", "")
                # target = data.get("target", "")
                # cwe = data.get("cwe", [])
                # cve = data.get("cve", "")
                # cve_desc = data.get("cve_desc", "")
    
                # Call get_result with the extracted fields
                result = 1 #get_result(commit_message, func, target, cwe, cve, cve_desc)
    
                # Build an output dictionary with the required fields and result
                output_data = {
                    "project": project,
                    # "commit_message": commit_message,
                    # "func": func,
                    # "target": target,
                    # "cwe": cwe,
                    # "cve": cve,
                    # "cve_desc": cve_desc,
                    # "result": result
                }
                results.append(output_data)

                count = count + 1

                # Pause periodically (e.g., one second) before processing the next record.
                time.sleep(1)

            

In [1]:
import json

unique_projects = set()

input_file = "PrimeVul_v0.1/primevul_valid.jsonl"
count = 0
max_test = 5000

with open(input_file, "r") as f:
    for line in f:
        if count < max_test:
            data = json.loads(line)
            project = data.get("project")
            if project:
                unique_projects.add(project)

# Convert to sorted list if you want it in order
unique_projects = sorted(unique_projects)

# Optionally, write to a file
with open("unique_projects.txt", "w") as out_file:
    for project in unique_projects:
        out_file.write(project + "\n")

# If you just want to print the result
print("Found", len(unique_projects), "unique projects.")
print(unique_projects)


Found 217 unique projects.
['389-ds-base', 'Chrome', 'FFmpeg', 'FreeRDP', 'FreeRTOS-Kernel', 'GIMP', 'ImageMagick', 'ImageMagick6', 'LibRaw', 'LuaJIT', 'OpenDoas', 'OpenSC', 'Pillow', 'SoftHSMv2', 'abcm2ps', 'acl', 'acrn-hypervisor', 'ardour', 'ast', 'asylo', 'at91bootstrap', 'atomicparsley', 'bash', 'bdwgc', 'bfgminer', 'bootstrap-dht', 'bsdiff4', 'bwm-ng', 'bzip2', 'c-ares', 'c-blosc2', 'ceph', 'cgit', 'clamav-devel', 'clutter', 'contiki-ng', 'core', 'cpio', 'cpp-peglib', 'crawl', 'curl', 'cyrus-imapd', 'doom-vanille', 'dpdk', 'edk2', 'electron', 'envoy', 'exif', 'exim', 'exiv2', 'faad2', 'flac', 'flatpak', 'fluent-bit', 'fontforge', 'gd-libgd', 'gegl', 'ghostpdl', 'gilcc', 'git', 'glibc', 'gnulib', 'gnumeric', 'gnuplot', 'gnutls', 'gpac', 'graphviz', 'grep', 'gssproxy', 'gst-plugins-good', 'haproxy', 'hermes', 'hhvm', 'hivex', 'htmldoc', 'htslib', 'httpd', 'icu', 'inspircd', 'ipmitool', 'iproute2', 'isolated-vm', 'jasper', 'jerryscript', 'jhead', 'jsish', 'kdeconnect-kde', 'keepkey-

In [None]:
results

In [None]:
df = pd.DataFrame(results)
print(df.shape)
df.head()

In [3]:
import json

# Load the list of projects
# with open("projects.txt", "r") as f:
#     projects = set(line.strip() for line in f if line.strip())

projects = unique_projects #df["projects"]

# Initialize the dictionary to hold samples
project_samples = {
    project: {0: [], 1: []} for project in projects
}

# Read the JSONL file and collect samples
with open("PrimeVul_v0.1/primevul_train.jsonl", "r") as f:
    for line in f:
        data = json.loads(line)
        project = data.get("project")
        target = data.get("target")

        # Check if the project is one we're interested in
        if project in projects and len(project_samples[project][target]) < 5:
            project_samples[project][target].append(data)

        # Stop early if we’ve collected all needed samples
        if all(len(project_samples[p][0]) == 5 and len(project_samples[p][1]) == 5 for p in projects):
            break

# Save the results
with open("project_sample_mapping.json", "w") as out_file:
    json.dump(project_samples, out_file, indent=2)


In [26]:
model_id = "bigcode/starcoder2-3b"
model_name = model_id.split("/")[1]
model_name

'starcoder2-3b'

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

cache_dir = "/speed-scratch/ra_mdash/tmp/huggingface"

tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Base", trust_remote_code=True, cache_dir=cache_dir)
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Base", trust_remote_code=True, 
                                             load_in_8bit=True, 
                                             # torch_dtype=torch.bfloat16, 
                                             cache_dir=cache_dir)
input_text = "#write a quick sort algorithm"
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=128)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.


#write a quick sort algorithm
#quick sort is a divide and conquer algorithm
#it works by selecting a pivot element and then partitioning the array into two subarrays, one with elements less than the pivot and one with elements greater than the pivot
#the pivot element is then placed in its correct position in the sorted array
#the process is then repeated on the two subarrays until the entire array is sorted
#the time complexity of quick sort is O(nlogn) in the average case and O(n^2) in the worst case
#the space complexity of quick sort is O(logn


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

cache_dir = "/speed-scratch/ra_mdash/tmp/huggingface"

tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", trust_remote_code=True, cache_dir=cache_dir)
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", trust_remote_code=True, 
                                             load_in_8bit=True, 
                                             # torch_dtype=torch.bfloat16, 
                                             cache_dir=cache_dir)
messages=[
    { 'role': 'user', 'content': "write a quick sort algorithm in python."}
]
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
# tokenizer.eos_token_id is the id of <｜end▁of▁sentence｜>  token
outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=50, top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
print(tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True))


### Run scripts on SLURM

In [1]:
%load_ext slurm_magic
import warnings
warnings.filterwarnings("ignore")

### Zero-shot

#### Run starcoder2-3b

In [3]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=prime_vul        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --mem=32G               ## Assign 1G memory per node 
#SBATCH --gres=gpu:2

python run-primevul.py 

('Submitted batch job 516590\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Run Llama-3.2-3B-Instruct

In [66]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=prime_vul        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --mem=32G               ## Assign 1G memory per node 
#SBATCH --gres=gpu:2

python run-primevul.py --model_id meta-llama/Llama-3.2-3B-Instruct

('Submitted batch job 516574\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Run Qwen2.5-Coder-3B

In [5]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=prime_vul        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --mem=32G               ## Assign 1G memory per node 
#SBATCH --gres=gpu:2

python run-primevul.py --model_id Qwen/Qwen2.5-Coder-3B

('Submitted batch job 516673\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Run DeepSeek-Coder-V2-Lite-Instruct

In [11]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=prime_vul        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=1
#SBATCH --mem=160G                ## Assign 1G memory per node 
#SBATCH --gpus=2

python run-primevul.py --model_id deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct #--data_path /speed-scratch/ra_mdash/PrimeVul_v0.1/sample_data.jsonl

('Submitted batch job 516856\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

In [9]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=prime_vul        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=1
#SBATCH --mem=160G                ## Assign 1G memory per node 
#SBATCH --gpus=2

python run-primevul-deepseek.py --model_id deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct --data_path /speed-scratch/ra_mdash/PrimeVul_v0.1/sample_data.jsonl

('Submitted batch job 516838\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Run codegemma-7b

In [3]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=prime_vul_cg7        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=1
#SBATCH --mem=160G                ## Assign 1G memory per node 
#SBATCH --gpus=2

python run-primevul-codegemma.py --model_id google/codegemma-7b-it #--data_path /speed-scratch/ra_mdash/PrimeVul_v0.1/sample_data.jsonl

('Submitted batch job 518969\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

### Few-shot

#### Submit jobs

#### Run starcoder2-3b

In [13]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=few_shot        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=1
#SBATCH --mem=160G                ## Assign 1G memory per node 
#SBATCH --gpus=2

python run-primevul-few-shot.py #--model_id google/codegemma-2b

('Submitted batch job 519373\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Run Llama3.2

In [12]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=few_shot        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=1
#SBATCH --mem=160G                ## Assign 1G memory per node 
#SBATCH --gpus=2

python run-primevul-few-shot.py --model_id meta-llama/Llama-3.2-3B-Instruct

('Submitted batch job 519289\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Run Qwen2.5-Coder-3B

In [14]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=prime_vul        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --mem=160G               ## Assign 1G memory per node 
#SBATCH --gres=gpu:2

python run-primevul-few-shot.py --model_id Qwen/Qwen2.5-Coder-3B

('Submitted batch job 519375\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Generate Few-shot Data

In [8]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=few_shot        ## Give the job a name 
#SBATCH --mail-type=ALL        ## Receive all email type notifications 
#SBATCH --chdir=./             ## Use currect directory as working directory 
#SBATCH --nodes=1 
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=1
#SBATCH --mem=160G                ## Assign 1G memory per node 
#SBATCH --gpus=2

TORCH_USE_CUDA_DSA=1 python generate_few_shot_data.py #--model_id google/codegemma-7b-it #--data_path /speed-scratch/ra_mdash/PrimeVul_v0.1/sample_data.jsonl

('Submitted batch job 518975\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

#### Create few-shot examples

In [11]:
import json
import random

# ---------------------
# 1. Configuration
# ---------------------
FEW_SHOT_COUNT = 4  # Total number of few-shot examples to retrieve (e.g., 2-shot, 3-shot, 4-shot, etc.)
REASONING_FILE = "reasoning_output.json"
PRIMEVUL_FILE = "PrimeVul_v0.1/primevul_valid.jsonl"
OUTPUT_FILE = f"PrimeVul_v0.1/primevul_with_{FEW_SHOT_COUNT}_shot.json"


with open(REASONING_FILE, "r") as f:
    reasoning_data = json.load(f)

def get_reasoning_list(project_name, label):
    """Return the list for a given project and label (as string) or an empty list if not found."""
    if project_name in reasoning_data:
        return reasoning_data[project_name].get(str(label), [])
    return []

# ---------------------
# 3. Few-Shot Retrieval Logic (with caching)
# ---------------------
# We create a cache so that for each project we compute the few-shot examples only once.
few_shot_cache = {}

def get_few_shot_samples(project_name: str, n: int):
    """
    Returns n examples alternating between label 0 and 1 from the given project.
    If the project doesn't have enough examples for a label, fallback to 'FFmpeg'.
    Also returns a flag indicating whether a fallback was used in any of the shots.
    This function does not modify the original reasoning_data lists.
    """
    # Use cache if we've already computed few-shot for this project
    if project_name in few_shot_cache:
        return few_shot_cache[project_name]
    
    few_shot = []
    fallback_used = False
    # Build alternating label cycle: for n=4, e.g., [0, 1, 0, 1]
    label_cycle = [0, 1] * n
    label_cycle = label_cycle[:n]
    
    for label in label_cycle:
        # Try to get from the target project
        proj_list = get_reasoning_list(project_name, label)
        if len(proj_list) > 0:
            sample = proj_list[0]  # pick the first sample without deleting it
            few_shot.append(sample)
        else:
            # Fallback to FFmpeg
            ffmpeg_list = get_reasoning_list("FFmpeg", label)
            if len(ffmpeg_list) > 0:
                sample = ffmpeg_list[0]
                few_shot.append(sample)
                fallback_used = True
            else:
                # If no sample available in FFmpeg either, simply break out.
                print(f"Warning: No sample available for project '{project_name}' (label {label}) and fallback FFmpeg.")
                break

    # Cache the result for future use (store both few_shot and fallback flag)
    few_shot_cache[project_name] = (few_shot, fallback_used)
    return few_shot, fallback_used

# ---------------------
# 4. Process primevul_valid.jsonl and add few-shot examples line by line
# ---------------------
results = []

with open(PRIMEVUL_FILE, "r") as f_in:
    for count, line in enumerate(f_in):
        # For demonstration, you may limit processing (e.g., count < 5)
        # if count >= 5:
        #     break
        
        line_data = json.loads(line)
        project_name = line_data.get("project", "N/A")

        # Retrieve few-shot examples for this project (using cache so they're identical for all samples of the project)
        few_shot_samples, fallback = get_few_shot_samples(project_name, FEW_SHOT_COUNT)
        few_shot_text = ""
        
        # Format each few-shot example
        for few_shot_sample in few_shot_samples:
            target_text = "Yes" if few_shot_sample.get("target") == 1 else "No"
            few_shot_text += f"""
Example:
commit_message: {few_shot_sample.get("commit_message")}
func: {few_shot_sample.get("func")}
Answer: {target_text}
Reasoning: {few_shot_sample.get("reasoning")}

"""
        # Add the few-shot examples (and fallback flag) to the original sample
        line_data["few_shot_samples"] = few_shot_text.strip()  # Remove extra whitespace
        line_data["fallback"] = fallback
        results.append(line_data)

# ---------------------
# 5. Save the Output
# ---------------------
with open(OUTPUT_FILE, "w") as f_out:
    json.dump(results, f_out, indent=2)

print(f"✅ Done! Saved results with few-shot examples to '{OUTPUT_FILE}'")


✅ Done! Saved results with few-shot examples to 'PrimeVul_v0.1/primevul_with_4_shot.json'


In [13]:
len(results)

23948

#### Thinking-LLM-as-a-Judge

In [9]:
%%sbatch
#!/bin/bash
#SBATCH --job-name=deepspeed_llama_80gb  
#SBATCH --partition=pt                   
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8             
#SBATCH --mem=160G                    
#SBATCH --gres=gpu:nvidia_a100_7g.80gb:1

# module purge

# # Load necessary modules - **ADD THE CUDA MODULE HERE**
# # Replace 'cuda/11.8' with the actual name on your cluster
# module load cuda/11.8/default
# module load anaconda # Or miniconda, python, etc. depending on your setup
# source activate /speed-scratch/ra_mdash/tmp/jupyter-venv # Activate your environment

python thinking-llm-as-judge.py

('Submitted batch job 531797\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

In [8]:
%%sbatch
#!/bin/bash -l
#SBATCH --job-name=deepspeed_llama_80gb  
#SBATCH --partition=pt                   
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8             
#SBATCH --mem=160G                    
#SBATCH --gres=gpu:nvidia_a100_7g.80gb:1


#--constraint=gpu80         

# --- Environment Setup ---
echo "----------------------------------------------------"
echo "SLURM Job ID: $SLURM_JOB_ID"
echo "Run on host: $(hostname)"
echo "Working directory: $(pwd)"
echo "Start time: $(date)"
echo "----------------------------------------------------"

echo "Setting up environment..."
module purge > /dev/null 2>&1 || echo "Module purge failed (might be okay)."

echo "Loading modules..."
module load anaconda/default # Load Anaconda first
module load cuda/11.8/default

# --- Initialize Conda for this script's shell ---
echo "Sourcing conda.sh to enable conda activate..."
# Construct the path to conda.sh relative to the Anaconda module base path
# Adjust if your base anaconda install path is different
CONDA_BASE_PATH=$(conda info --base) # Get base path dynamically
CONDA_SH_PATH="${CONDA_BASE_PATH}/etc/profile.d/conda.sh"

if [ -f "$CONDA_SH_PATH" ]; then
    source "$CONDA_SH_PATH"
    echo "Successfully sourced $CONDA_SH_PATH"
else
    echo "ERROR: Cannot find conda.sh at $CONDA_SH_PATH. Conda activation might fail."
    # Fallback path guess (less reliable):
    # FALLBACK_CONDA_SH="/encs/pkg/anaconda3-2023.03/root/etc/profile.d/conda.sh"
    # if [ -f "$FALLBACK_CONDA_SH" ]; then
    #     source "$FALLBACK_CONDA_SH"
    #     echo "Successfully sourced fallback $FALLBACK_CONDA_SH"
    # else
    #    echo "ERROR: Fallback conda.sh not found either."
    #    exit 1
    # fi
    exit 1
fi

# Activate your conda environment using the modern command
echo "Activating conda environment..."
# Define the environment path for clarity
MY_CONDA_ENV="/speed-scratch/ra_mdash/tmp/deep-env"
source "$(conda info --base)/etc/profile.d/conda.sh" # Source conda functions
conda activate "$MY_CONDA_ENV"
if [ $? -ne 0 ]; then
    echo "ERROR: Failed to activate conda environment with 'conda activate'."
    exit 1
fi
# Define the Python executable path
PYTHON_EXEC="${MY_CONDA_ENV}/bin/python"
if [ ! -f "$PYTHON_EXEC" ]; then
    echo "ERROR: Python executable not found at $PYTHON_EXEC"
    exit 1
fi
echo "Using Python executable: $PYTHON_EXEC"
echo "Activated environment variable: $CONDA_DEFAULT_ENV"

# --- Manually Correct CUDA Environment Variables ---
# ... (export CUDA_HOME, PATH, LD_LIBRARY_PATH - these are still needed for compilation/runtime linking) ...
echo "Manually setting CUDA environment variables..."
export CUDA_HOME=/encs/pkg/cuda-11.8/root
if [ -z "$CUDA_HOME" ] || [ ! -d "$CUDA_HOME" ]; then
    echo "ERROR: CUDA_HOME path is incorrect or directory does not exist: $CUDA_HOME"
    exit 1
fi
# NOTE: We still prepend CUDA to PATH for nvcc etc., but call python explicitly
export PATH="${CUDA_HOME}/bin:${PATH}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"
echo "CUDA_HOME set to: $CUDA_HOME"


# --- Verification (Using Explicit Python Path) ---
echo "--- Environment Verification ---"
echo "Loaded Modules:"
module list
echo ""
echo "Explicit Python path: $PYTHON_EXEC"
echo "Explicit Python version: $($PYTHON_EXEC --version)" # Call python explicitly
echo ""
echo "CUDA_HOME: $CUDA_HOME"
echo "NVCC path: $(which nvcc)" # which should still work because we added CUDA_HOME/bin to PATH
echo "NVCC version:"
nvcc --version || echo "WARNING: nvcc command failed!"
echo ""
echo "PyTorch Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import torch; print(f'  Torch version: {torch.__version__}'); print(f'  Torch CUDA available: {torch.cuda.is_available()}'); print(f'  Torch CUDA version: {torch.version.cuda}'); print(f'  Torch device count: {torch.cuda.device_count()}')"
echo ""
echo "DeepSpeed Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import deepspeed; print(f'  DeepSpeed imported successfully. Version: {deepspeed.__version__}')" || echo "WARNING: Failed to import deepspeed!"
echo "--------------------------"


# --- Set TRITON_CACHE_DIR ---
# ... (TRITON_CACHE_DIR remains the same) ...
export TRITON_CACHE_DIR="/tmp/${USER}_triton_cache_${SLURM_JOB_ID}"
mkdir -p "$TRITON_CACHE_DIR"
echo "TRITON_CACHE_DIR set to: $TRITON_CACHE_DIR"
echo "----------------------------------------------------"


# --- Run Python Script (Using Explicit Python Path) ---
# echo "Running Python script: /nfs/speed-scratch/ra_mdash/thinking-llm-as-judge-deepspeed.py"
# Execute the main script using the full path to the environment's python
"$PYTHON_EXEC" /nfs/speed-scratch/ra_mdash/thinking-llm-as-judge-deepspeed.py
# if [ $? -ne 0 ]; then
#     echo "ERROR: Python script exited with non-zero status."
# fi

# --- Job Completion ---
# ... (Job completion logs remain the same) ...
# echo "----------------------------------------------------"
# echo "End time: $(date)"
# echo "Job finished."
# echo "----------------------------------------------------"

('Submitted batch job 531794\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

In [11]:
%%sbatch
#!/bin/bash -l
#SBATCH --job-name=deepspeed_llama_80gb  
#SBATCH --partition=pt                   
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8             
#SBATCH --mem=160G                    
#SBATCH --gres=gpu:nvidia_a100_7g.80gb:1


#--constraint=gpu80         

# --- Environment Setup ---
echo "----------------------------------------------------"
echo "SLURM Job ID: $SLURM_JOB_ID"
echo "Run on host: $(hostname)"
echo "Working directory: $(pwd)"
echo "Start time: $(date)"
echo "----------------------------------------------------"

echo "Setting up environment..."
module purge > /dev/null 2>&1 || echo "Module purge failed (might be okay)."

echo "Loading modules..."
module load anaconda/default # Load Anaconda first
module load cuda/11.8/default

# --- Initialize Conda for this script's shell ---
echo "Sourcing conda.sh to enable conda activate..."
# Construct the path to conda.sh relative to the Anaconda module base path
# Adjust if your base anaconda install path is different
CONDA_BASE_PATH=$(conda info --base) # Get base path dynamically
CONDA_SH_PATH="${CONDA_BASE_PATH}/etc/profile.d/conda.sh"

if [ -f "$CONDA_SH_PATH" ]; then
    source "$CONDA_SH_PATH"
    echo "Successfully sourced $CONDA_SH_PATH"
else
    echo "ERROR: Cannot find conda.sh at $CONDA_SH_PATH. Conda activation might fail."
    # Fallback path guess (less reliable):
    # FALLBACK_CONDA_SH="/encs/pkg/anaconda3-2023.03/root/etc/profile.d/conda.sh"
    # if [ -f "$FALLBACK_CONDA_SH" ]; then
    #     source "$FALLBACK_CONDA_SH"
    #     echo "Successfully sourced fallback $FALLBACK_CONDA_SH"
    # else
    #    echo "ERROR: Fallback conda.sh not found either."
    #    exit 1
    # fi
    exit 1
fi

# Activate your conda environment using the modern command
echo "Activating conda environment..."
# Define the environment path for clarity
MY_CONDA_ENV="/speed-scratch/ra_mdash/tmp/deep-env"
source "$(conda info --base)/etc/profile.d/conda.sh" # Source conda functions
conda activate "$MY_CONDA_ENV"
if [ $? -ne 0 ]; then
    echo "ERROR: Failed to activate conda environment with 'conda activate'."
    exit 1
fi
# Define the Python executable path
PYTHON_EXEC="${MY_CONDA_ENV}/bin/python"
if [ ! -f "$PYTHON_EXEC" ]; then
    echo "ERROR: Python executable not found at $PYTHON_EXEC"
    exit 1
fi
echo "Using Python executable: $PYTHON_EXEC"
echo "Activated environment variable: $CONDA_DEFAULT_ENV"

# --- Manually Correct CUDA Environment Variables ---
# ... (export CUDA_HOME, PATH, LD_LIBRARY_PATH - these are still needed for compilation/runtime linking) ...
echo "Manually setting CUDA environment variables..."
export CUDA_HOME=/encs/pkg/cuda-11.8/root
if [ -z "$CUDA_HOME" ] || [ ! -d "$CUDA_HOME" ]; then
    echo "ERROR: CUDA_HOME path is incorrect or directory does not exist: $CUDA_HOME"
    exit 1
fi
# NOTE: We still prepend CUDA to PATH for nvcc etc., but call python explicitly
export PATH="${CUDA_HOME}/bin:${PATH}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"
echo "CUDA_HOME set to: $CUDA_HOME"


# --- Verification (Using Explicit Python Path) ---
echo "--- Environment Verification ---"
echo "Loaded Modules:"
module list
echo ""
echo "Explicit Python path: $PYTHON_EXEC"
echo "Explicit Python version: $($PYTHON_EXEC --version)" # Call python explicitly
echo ""
echo "CUDA_HOME: $CUDA_HOME"
echo "NVCC path: $(which nvcc)" # which should still work because we added CUDA_HOME/bin to PATH
echo "NVCC version:"
nvcc --version || echo "WARNING: nvcc command failed!"
echo ""
echo "PyTorch Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import torch; print(f'  Torch version: {torch.__version__}'); print(f'  Torch CUDA available: {torch.cuda.is_available()}'); print(f'  Torch CUDA version: {torch.version.cuda}'); print(f'  Torch device count: {torch.cuda.device_count()}')"
echo ""
echo "DeepSpeed Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import deepspeed; print(f'  DeepSpeed imported successfully. Version: {deepspeed.__version__}')" || echo "WARNING: Failed to import deepspeed!"
echo "--------------------------"


# --- Set TRITON_CACHE_DIR ---
# ... (TRITON_CACHE_DIR remains the same) ...
export TRITON_CACHE_DIR="/tmp/${USER}_triton_cache_${SLURM_JOB_ID}"
mkdir -p "$TRITON_CACHE_DIR"
echo "TRITON_CACHE_DIR set to: $TRITON_CACHE_DIR"
echo "----------------------------------------------------"


# --- Run Python Script (Using Explicit Python Path) ---
# echo "Running Python script: /nfs/speed-scratch/ra_mdash/thinking-llm-as-judge-deepspeed.py"
# Execute the main script using the full path to the environment's python
"$PYTHON_EXEC" /nfs/speed-scratch/ra_mdash/thinking-llm-as-judge.py
# if [ $? -ne 0 ]; then
#     echo "ERROR: Python script exited with non-zero status."
# fi

# --- Job Completion ---
# ... (Job completion logs remain the same) ...
# echo "----------------------------------------------------"
# echo "End time: $(date)"
# echo "Job finished."
# echo "----------------------------------------------------"

('Submitted batch job 531799\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

In [2]:
%%sbatch
#!/bin/bash -l
#SBATCH --job-name=deepspeed_llama_80gb  
#SBATCH --partition=pt                   
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8             
#SBATCH --mem=160G                    
#SBATCH --gres=gpu:nvidia_a100_7g.80gb:1


#--constraint=gpu80         

# --- Environment Setup ---
echo "----------------------------------------------------"
echo "SLURM Job ID: $SLURM_JOB_ID"
echo "Run on host: $(hostname)"
echo "Working directory: $(pwd)"
echo "Start time: $(date)"
echo "----------------------------------------------------"

echo "Setting up environment..."
module purge > /dev/null 2>&1 || echo "Module purge failed (might be okay)."

echo "Loading modules..."
module load anaconda/default # Load Anaconda first
module load cuda/11.8/default

# --- Initialize Conda for this script's shell ---
echo "Sourcing conda.sh to enable conda activate..."
# Construct the path to conda.sh relative to the Anaconda module base path
# Adjust if your base anaconda install path is different
CONDA_BASE_PATH=$(conda info --base) # Get base path dynamically
CONDA_SH_PATH="${CONDA_BASE_PATH}/etc/profile.d/conda.sh"

if [ -f "$CONDA_SH_PATH" ]; then
    source "$CONDA_SH_PATH"
    echo "Successfully sourced $CONDA_SH_PATH"
else
    echo "ERROR: Cannot find conda.sh at $CONDA_SH_PATH. Conda activation might fail."
    exit 1
fi

# Activate your conda environment using the modern command
echo "Activating conda environment..."
# Define the environment path for clarity
MY_CONDA_ENV="/speed-scratch/ra_mdash/tmp/deep-env"
source "$(conda info --base)/etc/profile.d/conda.sh" # Source conda functions
conda activate "$MY_CONDA_ENV"
if [ $? -ne 0 ]; then
    echo "ERROR: Failed to activate conda environment with 'conda activate'."
    exit 1
fi
# Define the Python executable path
PYTHON_EXEC="${MY_CONDA_ENV}/bin/python"
if [ ! -f "$PYTHON_EXEC" ]; then
    echo "ERROR: Python executable not found at $PYTHON_EXEC"
    exit 1
fi
echo "Using Python executable: $PYTHON_EXEC"
echo "Activated environment variable: $CONDA_DEFAULT_ENV"

# --- Manually Correct CUDA Environment Variables ---
# ... (export CUDA_HOME, PATH, LD_LIBRARY_PATH - these are still needed for compilation/runtime linking) ...
echo "Manually setting CUDA environment variables..."
export CUDA_HOME=/encs/pkg/cuda-11.8/root
if [ -z "$CUDA_HOME" ] || [ ! -d "$CUDA_HOME" ]; then
    echo "ERROR: CUDA_HOME path is incorrect or directory does not exist: $CUDA_HOME"
    exit 1
fi
# NOTE: We still prepend CUDA to PATH for nvcc etc., but call python explicitly
export PATH="${CUDA_HOME}/bin:${PATH}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"
echo "CUDA_HOME set to: $CUDA_HOME"


# --- Verification (Using Explicit Python Path) ---
echo "--- Environment Verification ---"
echo "Loaded Modules:"
module list
echo ""
echo "Explicit Python path: $PYTHON_EXEC"
echo "Explicit Python version: $($PYTHON_EXEC --version)" # Call python explicitly
echo ""
echo "CUDA_HOME: $CUDA_HOME"
echo "NVCC path: $(which nvcc)" # which should still work because we added CUDA_HOME/bin to PATH
echo "NVCC version:"
nvcc --version || echo "WARNING: nvcc command failed!"
echo ""
echo "PyTorch Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import torch; print(f'  Torch version: {torch.__version__}'); print(f'  Torch CUDA available: {torch.cuda.is_available()}'); print(f'  Torch CUDA version: {torch.version.cuda}'); print(f'  Torch device count: {torch.cuda.device_count()}')"
echo ""
echo "DeepSpeed Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import deepspeed; print(f'  DeepSpeed imported successfully. Version: {deepspeed.__version__}')" || echo "WARNING: Failed to import deepspeed!"
echo "--------------------------"


# --- Set TRITON_CACHE_DIR ---
# ... (TRITON_CACHE_DIR remains the same) ...
export TRITON_CACHE_DIR="/tmp/${USER}_triton_cache_${SLURM_JOB_ID}"
mkdir -p "$TRITON_CACHE_DIR"
echo "TRITON_CACHE_DIR set to: $TRITON_CACHE_DIR"
echo "----------------------------------------------------"


# --- Run Python Script (Using Explicit Python Path) ---
# echo "Running Python script: /nfs/speed-scratch/ra_mdash/thinking-llm-as-judge-deepspeed.py"
# Execute the main script using the full path to the environment's python
"$PYTHON_EXEC" /nfs/speed-scratch/ra_mdash/run-primevul-thinking-llm-as-judge.py
# if [ $? -ne 0 ]; then
#     echo "ERROR: Python script exited with non-zero status."
# fi

# --- Job Completion ---
# ... (Job completion logs remain the same) ...
# echo "----------------------------------------------------"
# echo "End time: $(date)"
# echo "Job finished."
# echo "----------------------------------------------------"

('Submitted batch job 531973\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

In [2]:
%%sbatch
#!/bin/bash -l
#SBATCH --job-name=deepspeed_llama_80gb  
#SBATCH --partition=pt                   
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8             
#SBATCH --mem=160G                    
#SBATCH --gres=gpu:nvidia_a100_7g.80gb:1


#--constraint=gpu80         

# --- Environment Setup ---
echo "----------------------------------------------------"
echo "SLURM Job ID: $SLURM_JOB_ID"
echo "Run on host: $(hostname)"
echo "Working directory: $(pwd)"
echo "Start time: $(date)"
echo "----------------------------------------------------"

echo "Setting up environment..."
module purge > /dev/null 2>&1 || echo "Module purge failed (might be okay)."

echo "Loading modules..."
module load anaconda/default # Load Anaconda first
module load cuda/11.8/default

# --- Initialize Conda for this script's shell ---
echo "Sourcing conda.sh to enable conda activate..."
# Construct the path to conda.sh relative to the Anaconda module base path
# Adjust if your base anaconda install path is different
CONDA_BASE_PATH=$(conda info --base) # Get base path dynamically
CONDA_SH_PATH="${CONDA_BASE_PATH}/etc/profile.d/conda.sh"

if [ -f "$CONDA_SH_PATH" ]; then
    source "$CONDA_SH_PATH"
    echo "Successfully sourced $CONDA_SH_PATH"
else
    echo "ERROR: Cannot find conda.sh at $CONDA_SH_PATH. Conda activation might fail."
    exit 1
fi

# Activate your conda environment using the modern command
echo "Activating conda environment..."
# Define the environment path for clarity
MY_CONDA_ENV="/speed-scratch/ra_mdash/tmp/deep-env"
source "$(conda info --base)/etc/profile.d/conda.sh" # Source conda functions
conda activate "$MY_CONDA_ENV"
if [ $? -ne 0 ]; then
    echo "ERROR: Failed to activate conda environment with 'conda activate'."
    exit 1
fi
# Define the Python executable path
PYTHON_EXEC="${MY_CONDA_ENV}/bin/python"
if [ ! -f "$PYTHON_EXEC" ]; then
    echo "ERROR: Python executable not found at $PYTHON_EXEC"
    exit 1
fi
echo "Using Python executable: $PYTHON_EXEC"
echo "Activated environment variable: $CONDA_DEFAULT_ENV"

# --- Manually Correct CUDA Environment Variables ---
# ... (export CUDA_HOME, PATH, LD_LIBRARY_PATH - these are still needed for compilation/runtime linking) ...
echo "Manually setting CUDA environment variables..."
export CUDA_HOME=/encs/pkg/cuda-11.8/root
if [ -z "$CUDA_HOME" ] || [ ! -d "$CUDA_HOME" ]; then
    echo "ERROR: CUDA_HOME path is incorrect or directory does not exist: $CUDA_HOME"
    exit 1
fi
# NOTE: We still prepend CUDA to PATH for nvcc etc., but call python explicitly
export PATH="${CUDA_HOME}/bin:${PATH}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"
echo "CUDA_HOME set to: $CUDA_HOME"


# --- Verification (Using Explicit Python Path) ---
echo "--- Environment Verification ---"
echo "Loaded Modules:"
module list
echo ""
echo "Explicit Python path: $PYTHON_EXEC"
echo "Explicit Python version: $($PYTHON_EXEC --version)" # Call python explicitly
echo ""
echo "CUDA_HOME: $CUDA_HOME"
echo "NVCC path: $(which nvcc)" # which should still work because we added CUDA_HOME/bin to PATH
echo "NVCC version:"
nvcc --version || echo "WARNING: nvcc command failed!"
echo ""
echo "PyTorch Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import torch; print(f'  Torch version: {torch.__version__}'); print(f'  Torch CUDA available: {torch.cuda.is_available()}'); print(f'  Torch CUDA version: {torch.version.cuda}'); print(f'  Torch device count: {torch.cuda.device_count()}')"
echo ""
echo "DeepSpeed Check:"
# Call python explicitly for the check
"$PYTHON_EXEC" -c "import deepspeed; print(f'  DeepSpeed imported successfully. Version: {deepspeed.__version__}')" || echo "WARNING: Failed to import deepspeed!"
echo "--------------------------"


# --- Set TRITON_CACHE_DIR ---
# ... (TRITON_CACHE_DIR remains the same) ...
export TRITON_CACHE_DIR="/tmp/${USER}_triton_cache_${SLURM_JOB_ID}"
mkdir -p "$TRITON_CACHE_DIR"
echo "TRITON_CACHE_DIR set to: $TRITON_CACHE_DIR"
echo "----------------------------------------------------"


# --- Run Python Script (Using Explicit Python Path) ---
# echo "Running Python script: /nfs/speed-scratch/ra_mdash/thinking-llm-as-judge-deepspeed.py"
# Execute the main script using the full path to the environment's python
"$PYTHON_EXEC" /nfs/speed-scratch/ra_mdash/run-primevul-thinking-llm-as-judge.py \
--output_file /speed-scratch/ra_mdash/results/prime_vul/Self-taught-evaluator-llama3.1-70B_thinking_judge_fewshot.jsonl \
--response_1_path /speed-scratch/ra_mdash/results/prime_vul/Llama-3.2-3B-Instruct_few_shot_CORRECTED.jsonl \
--response_2_path /speed-scratch/ra_mdash/results/prime_vul/Qwen2.5-Coder-3B_few_shot_CORRECTED.jsonl
# if [ $? -ne 0 ]; then
#     echo "ERROR: Python script exited with non-zero status."
# fi

# --- Job Completion ---
# ... (Job completion logs remain the same) ...
# echo "----------------------------------------------------"
# echo "End time: $(date)"
# echo "Job finished."
# echo "----------------------------------------------------"

('Submitted batch job 532239\n',
 'sbatch:  ====> Nodes limited to 1 GPU per JOB: speed-01, speed-05, speed-17\nsbatch:  ====> If this job is rejected, use -ppt instead of -ppg\n')

In [None]:
import json

unique_projects = set()

input_file = "PrimeVul_v0.1/primevul_valid.jsonl"
count = 0
max_test = 5000

with open(input_file, "r") as f:
    for line in f:
        if count < max_test:
            data = json.loads(line)
            project = data.get("project")
            if project:
                unique_projects.add(project)

# Convert to sorted list if you want it in order
unique_projects = sorted(unique_projects)

# Optionally, write to a file
with open("unique_projects.txt", "w") as out_file:
    for project in unique_projects:
        out_file.write(project + "\n")

# If you just want to print the result
print("Found", len(unique_projects), "unique projects.")
print(unique_projects)


In [6]:
import base64
import os
from google import genai
from google.genai import types

os.environ["GEMINI_API_KEY"] = "AIzaSyAHARGrWFDbv6-hR8Dv3MMdBs1vTM9unm0"

text = """
You are an AI vulnerability analyst.

Project:
389-ds-base

Commit Message:
Issue 4711 - SIGSEV with sync_repl (#4738)

Bug description:
	sync_repl sends back entries identified with a unique
	identifier that is 'nsuniqueid'. If 'nsuniqueid' is
	missing, then it may crash

Fix description:
	Check a nsuniqueid is available else returns OP_ERR

relates: https://github.com/389ds/389-ds-base/issues/4711

Reviewed by: Pierre Rogier, James Chapman, William Brown (Thanks!)

Platforms tested:  F33

Code Snippet:
sync_create_sync_done_control(LDAPControl **ctrlp, int refresh, char *cookie)
{
    int rc;
    BerElement *ber;
    struct berval *bvp;

    if (ctrlp == NULL || (ber = der_alloc()) == NULL) {
        return (LDAP_OPERATIONS_ERROR);
    }

    *ctrlp = NULL;

    if (cookie) {
        if ((rc = ber_printf(ber, "{s", cookie)) != -1) {
            if (refresh) {
                rc = ber_printf(ber, "b}", refresh);
            } else {
                rc = ber_printf(ber, "}");
            }
        }
    } else {
        if (refresh) {
            rc = ber_printf(ber, "{b}", refresh);
        } else {
            rc = ber_printf(ber, "{}");
        }
    }
    if (rc != -1) {
        rc = ber_flatten(ber, &bvp);
    }
    ber_free(ber, 1);

    if (rc == -1) {
        return (LDAP_OPERATIONS_ERROR);
    }

    *ctrlp = (LDAPControl *)slapi_ch_malloc(sizeof(LDAPControl));
    (*ctrlp)->ldctl_iscritical = 0;
    (*ctrlp)->ldctl_oid = slapi_ch_strdup(LDAP_CONTROL_SYNC_DONE);
    (*ctrlp)->ldctl_value = *bvp; /* struct copy */

    bvp->bv_val = NULL;
    ber_bvfree(bvp);

    return (LDAP_SUCCESS);
}

This code is NOT vulnerable. Based on the provided information, explain in 2 sentences why this code is safe or not flagged as vulnerable.

Answer:
"""

def generate():
    client = genai.Client(
        api_key=os.environ.get("GEMINI_API_KEY"),
    )

    model = "gemma-3-27b-it"
    # contents = [
    #     types.Content(
    #         role="user",
    #         parts=[
    #             types.Part.from_text(text=text),
    #         ],
    #     ),
    # ]
    # generate_content_config = types.GenerateContentConfig(
    #     response_mime_type="text/plain",
    # )

    # for chunk in client.models.generate_content_stream(
    #     model=model,
    #     contents=contents,
    #     config=generate_content_config,
    # ):
    #     print(chunk.text, end="")
    response = client.models.generate_content(model=model, contents=text)
    print(response.text)

if __name__ == "__main__":
    generate()


This code snippet is not directly related to the described bug regarding missing `nsuniqueid` in `sync_repl`. The provided code focuses on constructing an LDAP control for synchronization completion and doesn't handle or process entries with `nsuniqueid`, therefore it doesn't introduce or fix the vulnerability described in the commit message.






In [9]:
import os
import json
import time
from google import genai
from google.genai import types

# Set Gemini API key
os.environ["GEMINI_API_KEY"] = "AIzaSyAHARGrWFDbv6-hR8Dv3MMdBs1vTM9unm0"

# Initialize Gemini client
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
model = "gemma-3-27b-it"
generate_content_config = types.GenerateContentConfig(response_mime_type="text/plain")

# Load the data mapping
with open("project_sample_mapping.json", "r") as f:
    project_data = json.load(f)

# Storage for generated reasonings
reasoning_results = {}

# Prompt template
def build_prompt(example):
    base_prompt = f"""You are an AI vulnerability analyst.

Project:
{example.get("project", "N/A")}

Commit Message:
{example.get("commit_message", "N/A")}

Code Snippet:
{example.get("func", "N/A")}

"""
    if example["target"] == 1:
        base_prompt += f"""This code is vulnerable. Based on the information provided, explain in 2 sentences why this code is vulnerable. You may also consider this CVE description:

CVE Description:
{example.get("cve_desc", "N/A")}

Answer:"""
    else:
        base_prompt += """This code is NOT vulnerable. Based on the provided information, explain in 2 sentences why this code is safe or not flagged as vulnerable.

Answer:"""
    return base_prompt
    

# Function to call Gemini and get reasoning
def get_reasoning(prompt):
    response = client.models.generate_content(model=model, contents=prompt)
    print(response.text)
    return response.text
        
count = 0
# Generate reasoning for each example
for project, targets in project_data.items(): 
    if count < 2:
        reasoning_results[project] = { "0": [], "1": [] }
        for target_str, examples in targets.items():
            for example in examples:
                prompt = build_prompt(example)
                print(prompt)
                # print(f"\nGenerating reasoning for project: {project}, target: {target_str}, idx: {example['idx']}")
                reasoning = get_reasoning(prompt)
                reasoning_results[project][target_str].append({
                    "idx": example["idx"],
                    "reasoning": reasoning,
                    "func": example["func"],
                    "commit_message": example["commit_message"],
                    "cve_desc": example.get("cve_desc", ""),
                    "target": example["target"]
                })
                time.sleep(5)  # Sleep to be kind to API rate limits
        count = count + 1

# Save reasoning results
with open("reasoning_output.json", "w") as out_file:
    json.dump(reasoning_results, out_file, indent=2)

print("✅ All reasonings generated and saved to 'reasoning_output.json'")


You are an AI vulnerability analyst.

Project:
389-ds-base

Commit Message:
Issue 4711 - SIGSEV with sync_repl (#4738)

Bug description:
	sync_repl sends back entries identified with a unique
	identifier that is 'nsuniqueid'. If 'nsuniqueid' is
	missing, then it may crash

Fix description:
	Check a nsuniqueid is available else returns OP_ERR

relates: https://github.com/389ds/389-ds-base/issues/4711

Reviewed by: Pierre Rogier, James Chapman, William Brown (Thanks!)

Platforms tested:  F33

Code Snippet:
sync_create_sync_done_control(LDAPControl **ctrlp, int refresh, char *cookie)
{
    int rc;
    BerElement *ber;
    struct berval *bvp;

    if (ctrlp == NULL || (ber = der_alloc()) == NULL) {
        return (LDAP_OPERATIONS_ERROR);
    }

    *ctrlp = NULL;

    if (cookie) {
        if ((rc = ber_printf(ber, "{s", cookie)) != -1) {
            if (refresh) {
                rc = ber_printf(ber, "b}", refresh);
            } else {
                rc = ber_printf(ber, "}");
         

ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_input_token_count', 'quotaId': 'GenerateContentInputTokensPerModelPerMinute-FreeTier', 'quotaDimensions': {'model': 'gemma-3-27b', 'location': 'global'}, 'quotaValue': '15000'}]}, {'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '2s'}]}}

In [20]:
import time
import random
from google.api_core.exceptions import ResourceExhausted


# Set Gemini API key
os.environ["GEMINI_API_KEY"] = "AIzaSyAHARGrWFDbv6-hR8Dv3MMdBs1vTM9unm0"

# Initialize Gemini client
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
model = "gemma-3-27b-it"
generate_content_config = types.GenerateContentConfig(response_mime_type="text/plain")

# Load the data mapping
with open("project_sample_mapping.json", "r") as f:
    project_data = json.load(f)

# Storage for generated reasonings
reasoning_results = {}

# Prompt template
def build_prompt(example):
    base_prompt = f"""You are an AI vulnerability analyst.

Project:
{example.get("project", "N/A")}

Commit Message:
{example.get("commit_message", "N/A")}

Code Snippet:
{example.get("func", "N/A")}

"""
    if example["target"] == 1:
        base_prompt += f"""This code is vulnerable. Based on the information provided, explain in 2 sentences why this code is vulnerable. You may also consider this CVE description:

CVE Description:
{example.get("cve_desc", "N/A")}

Answer:"""
    else:
        base_prompt += """This code is NOT vulnerable. Based on the provided information, explain in 2 sentences why this code is safe or not flagged as vulnerable.

Answer:"""
    return base_prompt


def exponential_backoff_retry(api_call, max_retries=5, initial_delay=2, backoff_factor=2):
    """
    Retries an API call with exponential backoff in case of 429 errors.

    :param api_call: Function representing the API call.
    :param max_retries: Maximum number of retries.
    :param initial_delay: Initial delay in seconds.
    :param backoff_factor: Factor by which the delay increases.
    :return: Result of the API call if successful.
    :raises: Exception if all retries fail.
    """
    delay = initial_delay
    for attempt in range(max_retries):
        try:
            return api_call()
        except ResourceExhausted as e:
            if attempt == max_retries - 1:
                raise
            wait_time = delay + random.uniform(0, 1)  # Adding jitter
            print(f"Rate limit exceeded. Retrying in {wait_time:.2f} seconds...")
            time.sleep(wait_time)
            delay *= backoff_factor

def get_reasoning(prompt):
    def api_call():
        # contents = [types.Content(role="user", parts=[types.Part.from_text(prompt)])]
        response = client.models.generate_content(model=model, contents=prompt)
        # response = client.models.generate_content(model=model, contents=contents)
        # print(response.text)
        return response.text

    return exponential_backoff_retry(api_call)


count = 0
for project, targets in project_data.items():
    if count < 2000:
        reasoning_results[project] = {"0": [], "1": []}
        for target_str, examples in targets.items():
            for example in examples:
                prompt = build_prompt(example)
                try:
                    reasoning = get_reasoning(prompt)
                    reasoning_results[project][target_str].append({
                        "idx": example["idx"],
                        "reasoning": reasoning,
                        "func": example["func"],
                        "commit_message": example["commit_message"],
                        "cve_desc": example.get("cve_desc", ""),
                        "target": example["target"]
                    })
                except Exception as e:
                    print(f"An error occurred: {e}")
                time.sleep(1)  # Optional: Additional delay to be kind to API rate limits
        count = count + 1
        if count % 10 == 0:
            print("Count", count)
            

# Save reasoning results
with open("reasoning_output.json", "w") as out_file:
    json.dump(reasoning_results, out_file, indent=2)

print("✅ All reasonings generated and saved to 'reasoning_output.json'")



An error occurred: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_input_token_count', 'quotaId': 'GenerateContentInputTokensPerModelPerMinute-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemma-3-27b'}, 'quotaValue': '15000'}]}, {'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '54s'}]}}
An error occurred: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please

In [23]:
from google import genai

client = genai.Client(api_key="AIzaSyAHARGrWFDbv6-hR8Dv3MMdBs1vTM9unm0")

response = client.models.generate_content(
    model="gemini-2.0-flash", contents=prompt
)
print(response.text)

The code is vulnerable to a memory leak because the allocated memory pointed to by `hdr0` is not freed when `p_reclen` is NULL, indicating zero entries, leading to a resource exhaustion denial of service. Although a `free(hdr0)` was added to handle error conditions such as `io->fd.seeks` and `io->fd.read` failing, it was not added when `p_reclen` is NULL.

