In [2]:
import openai
import pandas as pd
import os
OPEN_AI_API_KEY = os.getenv("OPEN_AI_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")



# Load your dataframe
df = pd.read_csv('source_data/bug_fix_sample.csv')  # Replace with the actual path to your dataframe

# Set up your OpenAI API key
openai.api_key = OPEN_AI_API_KEY  # Replace with your actual OpenAI API key

def get_explanation(buggy_code, fixed_code):
    """
    Function to get a brief, natural language explanation of why the fixed code is correct compared to the buggy code
    using the GPT-3.5-Turbo model. The explanation will be concise and approximately 20 tokens long.
    """
    prompt = f"""Explain in simple, clear language why the fixed code is correct and how it resolves the issue present in the buggy code. 
    Keep the explanation short, around 20 words.

    Buggy code:
    {buggy_code}

    Fixed code:
    {fixed_code}

    Why the fixed code is correct:"""

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that provides clear, brief explanations of why the fixed code is correct."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=20,  # Limit the output to around 20 tokens
        temperature=0.5
    )
    explanation = response['choices'][0]['message']['content'].strip()
    return explanation

# Apply the get_explanation function to each row and create a new column 'explanation'
df['explanation'] = df.apply(lambda row: get_explanation(row['buggy_code'], row['fixed_code']), axis=1)

# Save the dataframe with the new 'explanation' column
df.to_csv('output_data/bug_fix_explain_sample_output_gpt.csv', index=False)

print("Explanations have been generated and saved successfully.")

Explanations have been generated and saved successfully.
