# Libraries

In [49]:
import os
import langchain 
import langchain_community
import langchain_huggingface
import langchain_pinecone 
import pinecone
import dotenv
import openai
import textract
import pandas as pd
import numpy as np
import json
from openai import OpenAI
import subprocess

# Datasets

In [50]:
# BuggyCode = pd.read_pickle('../pytracebugs_dataset_v1/buggy_dataset/bugfixes_train.pickle')
# StableCode = pd.read_pickle('../pytracebugs_dataset_v1/stable_dataset/stable_code_train.pickle')
BuggyCode = []
CorrectCode = []
for i in range(1, 7):
    BuggyFile = f'./Data/Buggy/Code{i}Buggy.py'
    CorrectFile = f'./Data/Correct/Code{i}Correct.py'
    with open(BuggyFile, 'r') as f:
        BuggyCode.append(f.read())  
    with open(CorrectFile, 'r') as f:
        CorrectCode.append(f.read())
        
CodeMappings = {
    'Code1.py': 'TO BE FILLED BY LLM',
    'Code2.py': 'TO BE FILLED BY LLM',
    'Code3.py': 'TO BE FILLED BY LLM',
    'Code4.py': 'TO BE FILLED BY LLM',
    'Code5.py': 'TO BE FILLED BY LLM',
    'Code6.py': 'TO BE FILLED BY LLM',
}

In [51]:
LLM = langchain_huggingface.HuggingFaceEndpoint(
    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.8,
    top_k=50,
    huggingfacehub_api_token=dotenv.get_key('.env', 'HUGGINGFACE_API_KEY')
)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\saadl\.cache\huggingface\token
Login successful


##### Create the mappings for the Identifier LLM

In [52]:
from langchain import PromptTemplate
from langchain.schema.output_parser import StrOutputParser


MAPPING_CHAIN = (
    PromptTemplate.from_template(
        """
        I have attached the following code snippet below, please give me a one line description of what it does and a list of methods that it has.
        Do NOT include the code in your response.
        Do NOT move from the given format.
        Do NOT include any other information in your response.
        Do NOT create your own context or prompt.
        
        <Code>
        {code}
        </Code>
        
        please provide your answer in the following format:
        Description: <Description>
        Methods: <Method1>, <Method2>, <Method3>, ..."""
    )
    | LLM
    | StrOutputParser()
)

In [53]:
for i in range(6):
    CodeMappings[f'Code{i+1}.py'] = MAPPING_CHAIN.invoke({'code': BuggyCode[i]}).strip()
    
CodeMappings

{'Code1.py': 'Description: This is a buggy Math class that contains methods to perform arithmetic operations.\n        Methods: add, subtract, multiply, divide, power',
 'Code2.py': 'Description: This class, FlightTracker, is used to keep track of flights. It initializes with flights dictionary which contains the following keys: numbers, origins, destinations, durations, prices, dates, times, airlines, planes, seats, classes, assengers, status, captain.\n        Methods: get_flight_number, get_flight_origins, get_flight_destinations, get_flight_durations, get_flight_prices, get_flight_dates, get_flight_times, get_flight_airlines, get_flight_planes, get_flight_seats, get_flight_classes, get_flight_passengers, get_flight_status, get_flight_captain, get_total_flights, get_average_price, get_longest_flight_duration, get_shortest_flight_duration, get_total_passengers, get_flights_by_airline, get_flights_by_origin, get_flights_by_destination, get_flights_by_date, get_flights_by_status, get_f

#### Save codemappings in a readme file

In [54]:
import json
content = json.dumps(CodeMappings, indent=4)
readme_file = "CodeMappings.md"
with open(readme_file, "w") as file:
    file.write("```json\n")
    file.write(content)
    file.write("\n```")
print(f"Code mappings saved to {readme_file}")

Code mappings saved to CodeMappings.md


##### Load readme file

In [55]:
readme_file = "CodeMappings.md"
output_json_file = "CodeMappings.json"
with open(readme_file, "r") as file:
    lines = file.readlines()
start, end = None, None
for i, line in enumerate(lines):
    if line.strip() == "```json":
        start = i
    elif line.strip() == "```" and start is not None:
        end = i
        break

CodeMappings = None

if start is not None and end is not None:
    json_content = "".join(lines[start + 1:end])
    try:
        CodeMappings = json.loads(json_content)
        with open(output_json_file, "w") as json_file:
            json.dump(CodeMappings, json_file, indent=4)
        print(f"JSON saved to {output_json_file}")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
else:
    print("Error: JSON block not found in the README.md file.")

JSON saved to CodeMappings.json


##### Giving the LLM a bug report and the mappings to retrive the code file name

In [56]:
# Define the prompt
prompt = """
I have attached the following BUG REPORT below, please provide the file name that corresponds to the bug report.
Mappings include the following key: Code File Name. Value: Code Description and Methods.
Do NOT include the bug report in your response.
Do NOT move from the given format.
Do NOT include any other information in your response.
Do NOT create your own context or prompt.
Always Choose one file name do not give multiple file names.

<bug_report>
{bug_report}
</bug_report>

<mappings>
{mappings}
</mappings>

please provide your answer in the following format:
File: <File>
""".strip()

# Define the input
bug_report = "The numbers are not being added correctly"
mappings = f"Key: Code File Name. Value: Code Description and Methods.{CodeMappings}"

# Generate response using OpenAI API
try:
    client = OpenAI(api_key=dotenv.get_key('.env', 'OPENAI_API_KEY'))
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful AI."},
            {"role": "user", "content": prompt.format(bug_report=bug_report, mappings=mappings)}
            ]
        )
    BuggedFile = response.choices[0].message.content.split(': ')[1].strip()
except Exception as e:
    print("Error")
    
BuggedFile

'Code1.py'

In [57]:
# PINECONE_API_KEY = dotenv.get_key('.env', 'PINECONE_API_KEY')
# PINECONE = pinecone.Pinecone(api_key=PINECONE_API_KEY)
# index_name = "pytracebugs-llm-1"
# indexes = PINECONE.list_indexes()
# if index_name not in indexes:   
#     PINECONE.create_index(
#         name=index_name,
#         dimension=768,
#         metric="cosine",
#         spec=pinecone.ServerlessSpec(
#             cloud="aws",
#             region="us-east-1",
#         ),
#     )   
# INDEX = PINECONE.Index(index_name)

In [58]:
# def get_code_embeddings(code):
#     return LLM.encode(code)

# BuggyCodeEmbeddings = BuggyCode['full_file_code_before_merge']
    

#### Open the Code file and fix the bug

In [59]:
file_path = ".\Data\Buggy"
bugged_file_name = BuggedFile.split(".")[0] + "Buggy.py"
file = os.path.join(file_path , bugged_file_name)
try:
    with open(file, "r") as file:
        content = file.read()
    print("File Content:")
    print(content)
except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

File Content:
# Buggy Code

class Math:
    def __init__(self):
        pass
    def add(a, b):
        return a - b

    def subtract(a, b):
        return a * b

    def multiply(aa, b):
        return a * b

    def divide(a, b):
        return a / bc

    def power(a, b):
        return a ** ba


In [60]:
prompt = """
I have attached a BUG REPORT below, and the content from the corresponding python file CODE. Please fix the bug and give the code.
Do NOT include the bug report in your response.
Do NOT move from the given format.
Do NOT include any other information in your response.
Do NOT create your own context or prompt.

<bug_report>
{bug_report}
</bug_report>

<code>
{code}
</code>

Only give the fixed code and nothing else. Do not include comments as well.
"""

# Define the input
python_file = content
report = f"Bug Report: {bug_report}"

# Generate response using OpenAI API
try:
    client = OpenAI(api_key=dotenv.get_key('.env', 'OPENAI_API_KEY'))
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful AI."},
            {"role": "user", "content": prompt.format(bug_report=report, code=python_file)}
            ]
        )
    fixed_code = response.choices[0].message.content
except Exception as e:
    print("Error")
    

In [61]:
fixed_code

'class Math:\n    def __init__(self):\n        pass\n    def add(self, a, b):\n        return a + b\n\n    def subtract(self, a, b):\n        return a - b\n\n    def multiply(self, a, b):\n        return a * b\n\n    def divide(self, a, b):\n        return a / b\n\n    def power(self, a, b):\n        return a ** b'

In [63]:
output_file_path = ".\Data\Buggy" 
output_file_name = BuggedFile.split(".")[0] + "Buggy.py"
output_file_name = os.path.join(output_file_path, output_file_name)
existing_file_path = output_file_name
print(existing_file_path)
try:

    with open(existing_file_path, "r") as file:
        existing_code = file.read()

    commented_code = "\n".join([f"# {line}" for line in existing_code.splitlines()])

    write_code = fixed_code

    with open(output_file_name, "w") as file:
        file.write(commented_code)
        file.write("\n\n")  
        file.write(write_code)  
    
    print(f"Content successfully written to '{output_file_name}'")

except FileNotFoundError:
    print(f"Error: The file '{BuggedFile}' was not found in the specified directory.")
except Exception as e:
    print(f"An error occurred: {e}")

.\Data\Buggy\Code1Buggy.py
Content successfully written to '.\Data\Buggy\Code1Buggy.py'


#### Run test script

In [64]:
test_path = ".\Data\Test"
test_name = BuggedFile.split(".")[0] + "Test.py"
test = os.path.join(test_path,test_name)
# file_to_run = "example.py" 

try:
    result = subprocess.run(
        ["python", test],
        capture_output=True,  
        text=True
    )
    print(result.stdout)
    if result.stdout == True:
        print("Test case passed")
    else:
        print("Test case failed")
    
    if result.stderr:
        print("Script Errors.")
        print(result.stderr)
    
    print(f"Exit Code: {result.returncode}")
except FileNotFoundError:
    print(f"Error: The file '{test}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Math.add() missing 1 required positional argument: 'b'
False

Test case failed
Exit Code: 0
