In [None]:
# GPU llama-cpp-python
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78 numpy==1.23.4 --force-reinstall --upgrade --no-cache-dir --verbose
!pip install -q huggingface_hub
!pip install -q llama-cpp-python==0.1.78
!pip install -q numpy==1.23.4
!pip install -q datasets

In [None]:
from datasets import load_dataset
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from sklearn.metrics import accuracy_score

In [None]:
ds = load_dataset("ZHENGRAN/code_ujb_defectdetection")

In [None]:
df = ds['train'].to_pandas()

In [None]:
df.head()

Unnamed: 0,bug_id,task_id,function_signature,prompt_chat,code,defective,project,prompt_complete
0,49,000bd8114192f589e849a9f2a68d4edaaec806bd8ed37b...,public Object generateId(Object forPojo),"I want you to act as a code defect detector, w...",public Object generateId(Object forPojo) {...,True,JacksonDatabind,/**\n * Perform a binary search on a sorted ar...
1,56,005766411469575f53bb173eea8c7bf90dde1a7f0a41ba...,public int[] getCounts(int index),"I want you to act as a code defect detector, w...",public int[] getCounts(int index) {\n ...,False,Math,/**\n * Perform a binary search on a sorted ar...
2,9,009c707abb74fee8886efd9438bf903d296c8a30fda3f2...,public static byte[] encodeBase64(byte[] binar...,"I want you to act as a code defect detector, w...",public static byte[] encodeBase64(byte[] b...,False,Codec,/**\n * Perform a binary search on a sorted ar...
3,44,00d45f48dfdcff13baeaca61654b0467fae727dfc323d7...,public ChecksumCalculatingInputStream(final Ch...,"I want you to act as a code defect detector, w...",public ChecksumCalculatingInputStream(fina...,True,Compress,/**\n * Perform a binary search on a sorted ar...
4,64,0158d98cfe414fb8a3a32ae6d96b1f61a405dfb4990b38...,@Override\n protected VectorialPointValuePa...,"I want you to act as a code defect detector, w...",@Override\n protected VectorialPointVal...,True,Math,/**\n * Perform a binary search on a sorted ar...


In [None]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin"

In [None]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

llama-2-13b-chat.ggmlv3.q5_1.bin:   0%|          | 0.00/9.76G [00:00<?, ?B/s]

In [None]:
# GPU
lcpp_llm = None
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=2,
    n_batch=512,
    n_gpu_layers=32
    )

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | 


In [None]:
# Number of layers in GPU
lcpp_llm.params.n_gpu_layers

32

# Persona & Zero-Shot

In [None]:
prompts = df['prompt_chat']

In [None]:
output_PerZer = []
for pr in prompts:
  prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

  USER: {pr}

  ASSISTANT:
  '''
  response=lcpp_llm(prompt=prompt_template, max_tokens=150, temperature=0.5, top_p=0.95,
                repeat_penalty=1.2, top_k=150,
                echo=False)

  if "A. Yes, there are defects" in response["choices"][0]["text"]:
    output_PerZer.append(True)
  else:
    output_PerZer.append(False)

In [None]:
df['Persona & Zero-Shot'] = output_PerZer

In [None]:
df.head()

Unnamed: 0,bug_id,task_id,function_signature,prompt_chat,code,defective,project,prompt_complete,Persona & Zero-Shot
0,49,000bd8114192f589e849a9f2a68d4edaaec806bd8ed37b...,public Object generateId(Object forPojo),"I want you to act as a code defect detector, w...",public Object generateId(Object forPojo) {...,True,JacksonDatabind,/**\n * Perform a binary search on a sorted ar...,True
1,56,005766411469575f53bb173eea8c7bf90dde1a7f0a41ba...,public int[] getCounts(int index),"I want you to act as a code defect detector, w...",public int[] getCounts(int index) {\n ...,False,Math,/**\n * Perform a binary search on a sorted ar...,False
2,9,009c707abb74fee8886efd9438bf903d296c8a30fda3f2...,public static byte[] encodeBase64(byte[] binar...,"I want you to act as a code defect detector, w...",public static byte[] encodeBase64(byte[] b...,False,Codec,/**\n * Perform a binary search on a sorted ar...,False
3,44,00d45f48dfdcff13baeaca61654b0467fae727dfc323d7...,public ChecksumCalculatingInputStream(final Ch...,"I want you to act as a code defect detector, w...",public ChecksumCalculatingInputStream(fina...,True,Compress,/**\n * Perform a binary search on a sorted ar...,False
4,64,0158d98cfe414fb8a3a32ae6d96b1f61a405dfb4990b38...,@Override\n protected VectorialPointValuePa...,"I want you to act as a code defect detector, w...",@Override\n protected VectorialPointVal...,True,Math,/**\n * Perform a binary search on a sorted ar...,True


In [None]:
accuracy = accuracy_score(df['Persona & Zero-Shot'], df['defective'])
print(f'Accuracy of Persona & Zero-Shot: {accuracy * 100:.2f} %')

Accuracy of Persona & Zero-Shot: 51.49 %


# non-Persona & Few-Shot

In [None]:
prompts = df['prompt_complete']

In [None]:
output_nonPerFew = []
first_line = """Below are three examples of defective Java functions.
First, try to take a look at them.
Then, fill in the blank labeled 'Answer?' in the last line
for the given function based on what you have seen in the answer lines of the previous
examples, choosing either A or B:\n"""

for pr in prompts:
  pr = first_line + pr
  prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

  USER: {pr}

  ASSISTANT:
  '''
  response=lcpp_llm(prompt=prompt_template, max_tokens=150, temperature=0.5, top_p=0.95,
                repeat_penalty=1.2, top_k=150,
                echo=False)

  if "A. Yes, there are defects" in response["choices"][0]["text"]:
    output_nonPerFew.append(True)
  else:
    output_nonPerFew.append(False)

In [None]:
df['non-Persona & Few-Shot'] = output_nonPerFew

In [None]:
df.head()

Unnamed: 0,bug_id,task_id,function_signature,prompt_chat,code,defective,project,prompt_complete,Persona & Zero-Shot,non-Persona & Few-Shot
0,49,000bd8114192f589e849a9f2a68d4edaaec806bd8ed37b...,public Object generateId(Object forPojo),"I want you to act as a code defect detector, w...",public Object generateId(Object forPojo) {...,True,JacksonDatabind,/**\n * Perform a binary search on a sorted ar...,True,False
1,56,005766411469575f53bb173eea8c7bf90dde1a7f0a41ba...,public int[] getCounts(int index),"I want you to act as a code defect detector, w...",public int[] getCounts(int index) {\n ...,False,Math,/**\n * Perform a binary search on a sorted ar...,False,True
2,9,009c707abb74fee8886efd9438bf903d296c8a30fda3f2...,public static byte[] encodeBase64(byte[] binar...,"I want you to act as a code defect detector, w...",public static byte[] encodeBase64(byte[] b...,False,Codec,/**\n * Perform a binary search on a sorted ar...,False,False
3,44,00d45f48dfdcff13baeaca61654b0467fae727dfc323d7...,public ChecksumCalculatingInputStream(final Ch...,"I want you to act as a code defect detector, w...",public ChecksumCalculatingInputStream(fina...,True,Compress,/**\n * Perform a binary search on a sorted ar...,False,False
4,64,0158d98cfe414fb8a3a32ae6d96b1f61a405dfb4990b38...,@Override\n protected VectorialPointValuePa...,"I want you to act as a code defect detector, w...",@Override\n protected VectorialPointVal...,True,Math,/**\n * Perform a binary search on a sorted ar...,True,False


In [None]:
accuracy = accuracy_score(df['non-Persona & Few-Shot'], df['defective'])
print(f'Accuracy of non-Persona & Few-Shot: {accuracy * 100:.2f} %')

Accuracy of non-Persona & Few-Shot: 51.90 %


# non-Persona & Zero-Shot

In [None]:
prompts = df['code']

In [None]:
output_nonPerZer = []
first_line = """I'll provide you with a Java function
and it will be your responsibility to analyze it for potential issues
based on the provided function code.
Please respond with either
"A. Yes, there are defects" or
"B. No, there are no defects" based on your assessment.
Let's get started with our first potentially flawed Java function:\n"""

for pr in prompts:
  pr = first_line + pr
  prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

  USER: {pr}

  ASSISTANT:
  '''
  response=lcpp_llm(prompt=prompt_template, max_tokens=150, temperature=0.5, top_p=0.95,
                repeat_penalty=1.2, top_k=150,
                echo=False)

  if "A. Yes, there are defects" in response["choices"][0]["text"]:
    output_nonPerZer.append(True)
  else:
    output_nonPerZer.append(False)

In [None]:
df['non-Persona & Zero-Shot'] = output_nonPerZer

In [None]:
df.head()

Unnamed: 0,bug_id,task_id,function_signature,prompt_chat,code,defective,project,prompt_complete,Persona & Zero-Shot,non-Persona & Few-Shot,non-Persona & Zero-Shot
0,49,000bd8114192f589e849a9f2a68d4edaaec806bd8ed37b...,public Object generateId(Object forPojo),"I want you to act as a code defect detector, w...",public Object generateId(Object forPojo) {...,True,JacksonDatabind,/**\n * Perform a binary search on a sorted ar...,True,False,True
1,56,005766411469575f53bb173eea8c7bf90dde1a7f0a41ba...,public int[] getCounts(int index),"I want you to act as a code defect detector, w...",public int[] getCounts(int index) {\n ...,False,Math,/**\n * Perform a binary search on a sorted ar...,False,True,False
2,9,009c707abb74fee8886efd9438bf903d296c8a30fda3f2...,public static byte[] encodeBase64(byte[] binar...,"I want you to act as a code defect detector, w...",public static byte[] encodeBase64(byte[] b...,False,Codec,/**\n * Perform a binary search on a sorted ar...,False,False,True
3,44,00d45f48dfdcff13baeaca61654b0467fae727dfc323d7...,public ChecksumCalculatingInputStream(final Ch...,"I want you to act as a code defect detector, w...",public ChecksumCalculatingInputStream(fina...,True,Compress,/**\n * Perform a binary search on a sorted ar...,False,False,True
4,64,0158d98cfe414fb8a3a32ae6d96b1f61a405dfb4990b38...,@Override\n protected VectorialPointValuePa...,"I want you to act as a code defect detector, w...",@Override\n protected VectorialPointVal...,True,Math,/**\n * Perform a binary search on a sorted ar...,True,False,True


In [None]:
accuracy = accuracy_score(df['non-Persona & Zero-Shot'], df['defective'])
print(f'Accuracy of non-Persona & Zero-Shot: {accuracy * 100:.2f} %')

Accuracy of non-Persona & Zero-Shot: 51.62 %


# Persona & Few-Shot

In [None]:
prompts = df['prompt_complete']

In [None]:
output_PerFew = []
first_line = """I want you to act as a code defect detector, then
I'll provide you with four examples of defective Java functions.
First, try to take a look at them.
Then, fill in the blank labeled 'Answer?' in the last line
for the given function based on what you have seen in the answer lines of the previous
examples, choosing either A or B:\n"""

for pr in prompts:
  pr = first_line + pr
  prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

  USER: {pr}

  ASSISTANT:
  '''
  response=lcpp_llm(prompt=prompt_template, max_tokens=150, temperature=0.5, top_p=0.95,
                repeat_penalty=1.2, top_k=150,
                echo=False)

  if "A. Yes, there are defects" in response["choices"][0]["text"]:
    output_PerFew.append(True)
  else:
    output_PerFew.append(False)

In [None]:
df['Persona & Few-Shot'] = output_PerFew

In [None]:
df.head()

Unnamed: 0,bug_id,task_id,function_signature,prompt_chat,code,defective,project,prompt_complete,Persona & Zero-Shot,non-Persona & Few-Shot,non-Persona & Zero-Shot,Persona & Few-Shot
0,49,000bd8114192f589e849a9f2a68d4edaaec806bd8ed37b...,public Object generateId(Object forPojo),"I want you to act as a code defect detector, w...",public Object generateId(Object forPojo) {...,True,JacksonDatabind,/**\n * Perform a binary search on a sorted ar...,True,False,False,False
1,56,005766411469575f53bb173eea8c7bf90dde1a7f0a41ba...,public int[] getCounts(int index),"I want you to act as a code defect detector, w...",public int[] getCounts(int index) {\n ...,False,Math,/**\n * Perform a binary search on a sorted ar...,True,True,True,True
2,9,009c707abb74fee8886efd9438bf903d296c8a30fda3f2...,public static byte[] encodeBase64(byte[] binar...,"I want you to act as a code defect detector, w...",public static byte[] encodeBase64(byte[] b...,False,Codec,/**\n * Perform a binary search on a sorted ar...,False,False,False,False
3,44,00d45f48dfdcff13baeaca61654b0467fae727dfc323d7...,public ChecksumCalculatingInputStream(final Ch...,"I want you to act as a code defect detector, w...",public ChecksumCalculatingInputStream(fina...,True,Compress,/**\n * Perform a binary search on a sorted ar...,False,False,False,False
4,64,0158d98cfe414fb8a3a32ae6d96b1f61a405dfb4990b38...,@Override\n protected VectorialPointValuePa...,"I want you to act as a code defect detector, w...",@Override\n protected VectorialPointVal...,True,Math,/**\n * Perform a binary search on a sorted ar...,True,False,True,True


In [None]:
accuracy = accuracy_score(df['Persona & Few-Shot'], df['defective'])
print(f'Accuracy of Persona & Few-Shot: {accuracy * 100:.2f} %')

Accuracy of Persona & Few-Shot: 50.55 %
