# LLM-as-bug-detector

### Python client

A more convenient way of communicating with the server from a notebook, is by using a [python client](https://github.com/ollama/ollama-python) that wraps the REST API.

First the python client must be installed (by running the cell below), and then we can proceed to create an instance and use it to request the list of available models:

In [1]:
!pip -q install ollama 

In [2]:
import config_llm
config_llm.prompt_instruct

'You are an automated crash detector for machine learning notebooks. Given a sequence of code cells that have been successfully executed, determine whether the next code cell (the target cell) will crash upon execution. Return true it will crash, otherwise return false.\n'

In [None]:
# predict if a target cell crash or not
import crash_detector
import os, json
import re

is_buggy = True
llm_model = "mistral-small3.1:latest" #"llama3:70b" #"qwen2.5-coder:latest"

folder_path = f"detect_if_cell_crash/output_nb_{"buggy" if is_buggy else "fixed"}_processed"

outputfoldername=f"detect_if_cell_crash/{re.sub(r'[^\w]', '', llm_model.replace(':', '_'))}/results_{"buggy" if is_buggy else "fixed"}/"
id_crash = 0
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r', encoding='utf-8') as f:
            try:
                data = json.load(f)
                prompt = crash_detector.format_for_prompt(data)
                keyword=os.path.splitext(filename)[0]
                if (prompt is not None) and (os.path.exists(f"{outputfoldername}/crash_detection_results_{keyword}.json")==False):
                    crash_detector.llm_multiple_runs(
                        model=llm_model, 
                        user_message=prompt,
                        out_name=keyword,
                        outputfolder=outputfoldername,
                        runs=5
                    )
                    id_crash += 1
                else:
                    print(f"Skipping {filename}")
            except json.JSONDecodeError:
                print(f"Warning: Failed to parse {filename}")
print(f"Successfully detected {id_crash} cases")

Predicting tensorflow_4: 0th run...
Predicting tensorflow_4: 0th run...Attempt 1...
Predicting tensorflow_4: 0th run...Attempt 1 succeed.
Predicting tensorflow_4: 1th run...
Predicting tensorflow_4: 1th run...Attempt 1...
Predicting tensorflow_4: 1th run...Attempt 1 succeed.
Predicting tensorflow_4: 2th run...
Predicting tensorflow_4: 2th run...Attempt 1...
Predicting tensorflow_4: 2th run...Attempt 1 succeed.
Predicting tensorflow_4: 3th run...
Predicting tensorflow_4: 3th run...Attempt 1...
Predicting tensorflow_4: 3th run...Attempt 1 succeed.
Predicting tensorflow_4: 4th run...
Predicting tensorflow_4: 4th run...Attempt 1...
Predicting tensorflow_4: 4th run...Attempt 1: LLM call failed with error: POST predict: Post "http://127.0.0.1:37441/completion": EOF (status code: 500)
Retrying in 10 seconds...
Predicting tensorflow_4: 4th run...Attempt 2...
Predicting tensorflow_4: 4th run...Attempt 2 succeed.
Predictions by mistral-small3.1:latest finished 5 runs, the results are saved in de

In [None]:
# predict if a target cell crash or not
import crash_detector
import os, json
import re

is_buggy = False
llm_model = "mistral-small3.1:latest" # "llama3:70b" #"qwen2.5-coder:latest"

folder_path = f"detect_if_cell_crash/output_nb_{"buggy" if is_buggy else "fixed"}_processed"

outputfoldername=f"detect_if_cell_crash/{re.sub(r'[^\w]', '', llm_model.replace(':', '_'))}/results_{"buggy" if is_buggy else "fixed"}/"
id_crash = 0
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        filepath = os.path.join(folder_path, filename)
        with open(filepath, 'r', encoding='utf-8') as f:
            try:
                data = json.load(f)
                try:
                    prompt = crash_detector.format_for_prompt(data)
                except:
                    print(filename)
                    continue
                keyword=os.path.splitext(filename)[0]
                if (prompt is not None) and (os.path.exists(f"{outputfoldername}/crash_detection_results_{keyword}.json")==False):
                    crash_detector.llm_multiple_runs(
                        model=llm_model, 
                        user_message=prompt,
                        out_name=keyword,
                        outputfolder=outputfoldername,
                        runs=5
                    )
                    id_crash += 1
                else:
                    print(f"Skipping {filename}")
            except json.JSONDecodeError:
                print(f"Warning: Failed to parse {filename}")
print(f"Successfully detected {id_crash} cases")

#### calculate token length

In [2]:
# pip install tiktoken

In [None]:
import config_llm
import json
import crash_detector

filepath = "detect_if_cell_crash/input_nb_buggy_processed/numpy_10.json"
with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
        prompt = crash_detector.format_for_prompt(data)
                
user_message = prompt

In [12]:
# import tiktoken

# encoding = tiktoken.get_encoding("cl100k_base")  # GPT-4-style tokenizer
# print(len(encoding.encode(full_input)))  # Approximate token count

2268


#### a test

In [2]:
config_llm.prompt_instruct

'You are an automated crash detector for ML notebooks. Given a sequence of code cells that have been successfully executed, determine whether the next code cell (the target cell) will crash upon execution. Output TRUE it will crash, otherwise output FALSE. Do not output anything else.\n'

In [4]:
from ollama import Client

# Create a client for the LLM-as-a-service
client = Client(host='10.129.20.4:9090')

response = client.chat(model='mistral-small3.1:latest', 
                       messages=[
                           {'role': 'system', 'content': config_llm.prompt_instruct},
                           {'role': 'user', 'content': user_message}])
print(response.message.content)

FALSE
