In [5]:
import openai
import pandas as pd
import os
from sentence_transformers import SentenceTransformer, util

# Load a pre-trained sentence-transformers model for semantic similarity
model = SentenceTransformer('all-MiniLM-L6-v2')  # You can use other models from Hugging Face based on your need

OPEN_AI_API_KEY = os.getenv("OPEN_AI_API_KEY")

source_name = "data0"
sample_size = "5"

# Load your dataframe
df = pd.read_csv(f'source_data/{source_name}_sample_{sample_size}.csv')  # Replace with the actual path to your dataframe

# Set up your OpenAI API key
openai.api_key = OPEN_AI_API_KEY  # Replace with your actual OpenAI API key

def get_batch_explanations(batch_prompts, model_name):
    """
    Function to get explanations for a batch of prompts using the OpenAI Chat API.
    Allows specifying the model name for cost-effective usage.
    """
    responses = []
    
    for prompt in batch_prompts:
        response = openai.ChatCompletion.create(
            model=model_name,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that provides clear, brief explanations of why the fixed code is correct."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=100,
            n=1,
            temperature=0.5
        )
        explanations = response.choices[0].message['content'].strip()
        responses.append(explanations)
    
    return responses

def generate_reference_explanation(row):
    """
    Generates a reference explanation for the fixed code using a more cost-effective AI model (e.g., GPT-3.5).
    """
    prompt = f"""Explain in detailed, clear language why the fixed code is correct and how it resolves the issue present in the buggy code. Provide a reference explanation that can be used to compare with other explanations. Be specific and concise.

    Buggy code:
    {row['buggy_code']}

    Fixed code:
    {row['fixed_code']}

    Why the fixed code is correct:"""
    
    # Use GPT-3.5 for cost-effective reference explanation generation
    return get_batch_explanations([prompt], model_name="gpt-3.5-turbo")[0]

def evaluate_explanation_nlp(generated_explanation, reference_explanation, threshold=0.7):
    """
    Function to evaluate the quality of the explanation using semantic similarity.
    Returns the similarity score and whether the explanation is satisfactory.
    """
    # Encode both explanations to get embeddings
    generated_embedding = model.encode(generated_explanation, convert_to_tensor=True)
    reference_embedding = model.encode(reference_explanation, convert_to_tensor=True)

    # Compute cosine similarity between the embeddings
    similarity_score = util.pytorch_cos_sim(generated_embedding, reference_embedding).item()

    # If the similarity score is above the threshold, consider it satisfactory
    return similarity_score, similarity_score >= threshold

def analyze_explanation_issue(generated_explanation, reference_explanation):
    """
    Analyzes why an explanation might be considered "Needs Refinement."
    Returns an issue type that can be used to refine the prompt.
    """
    # If the explanation is very short or lacks detail
    if len(generated_explanation.split()) < 10:
        return "too_short"
    
    # If the explanation doesn't mention key concepts from the reference
    if any(keyword not in generated_explanation for keyword in reference_explanation.split()[:5]):
        return "missing_key_details"
    
    # Check if the explanation is too vague
    vague_terms = ["good", "correct", "proper", "appropriate"]
    if any(term in generated_explanation for term in vague_terms):
        return "too_vague"
    
    # Default case: issue is unclear
    return "general_issue"

def refine_prompt(prompt, issue_type):
    """
    Function to refine the prompt based on the detected issue in the generated explanation.
    """
    if issue_type == "too_short":
        refined_prompt = prompt + "\nThe previous explanation was too short. Provide a more detailed explanation that covers the key points."
    
    elif issue_type == "missing_key_details":
        refined_prompt = prompt + "\nThe previous explanation missed key details. Focus specifically on how the fixed code resolves the problem in the buggy code."
    
    elif issue_type == "too_vague":
        refined_prompt = prompt + "\nThe previous explanation was too vague. Provide a more specific explanation, avoiding generic terms like 'good' or 'correct'."

    else:
        # General refinement
        refined_prompt = prompt + "\nThe previous explanation was not clear enough. Please rephrase the explanation to be more informative and detailed."
    
    return refined_prompt

def refine_until_satisfactory(df):
    """
    Function to refine the prompts and explanations for rows marked as 'Needs Refinement'.
    The function continues until all rows are marked as 'Satisfactory'.
    """
    cycle = 1
    while "Needs Refinement" in df['evaluation_result'].values:
        print(f"Cycle {cycle}: Refining prompts for 'Needs Refinement' rows.")
        needs_refinement_rows = df[df['evaluation_result'] == "Needs Refinement"]
        
        for index, row in needs_refinement_rows.iterrows():
            # Analyze the issue with the generated explanation
            issue_type = analyze_explanation_issue(row['generated_explanation'], row['reference_explanation'])
            
            # Refine the prompt based on the issue
            refined_prompt = refine_prompt(row['prompt'], issue_type)
            
            # Generate a new explanation using the more accurate model (e.g., gpt-4o-mini)
            refined_explanation = get_batch_explanations([refined_prompt], model_name="gpt-4-turbo")[0]
            
            # Evaluate the refined explanation
            reference_explanation = row['reference_explanation']
            similarity_score, is_satisfactory = evaluate_explanation_nlp(refined_explanation, reference_explanation)
            
            # Update DataFrame with the new explanation and results
            df.at[index, 'generated_explanation'] = refined_explanation
            df.at[index, 'similarity_score'] = similarity_score
            df.at[index, 'evaluation_result'] = "Satisfactory" if is_satisfactory else "Needs Refinement"
            
            # If it's satisfactory, record the cycle number
            if is_satisfactory:
                df.at[index, 'completed_cycle'] = cycle
        
        cycle += 1

# Create prompts for each row and generate reference explanations using GPT-3.5
df['prompt'] = df.apply(
    lambda row: f"""Explain in simple, clear language why the fixed code is correct and how it resolves the issue present in the buggy code. 
    Keep the explanation short, around 20 words. don't include things as 'The fixed code correctly uses' like unwanted words, only include useful words, simply why the fixed_code is correct compared to buggy_code.

    Buggy code:
    {row['buggy_code']}

    Fixed code:
    {row['fixed_code']}

    Why the fixed code is correct:""",
    axis=1
)

# Generate AI-based reference explanations for each buggy/fixed code pair using GPT-3.5
df['reference_explanation'] = df.apply(generate_reference_explanation, axis=1)

# Initialize empty lists to store results
df['generated_explanation'] = ""
df['similarity_score'] = 0.0
df['evaluation_result'] = ""
df['completed_cycle'] = 0  # To track the cycle in which it became satisfactory

# First batch of explanations using gpt-4o-mini for higher accuracy
df['generated_explanation'] = get_batch_explanations(df['prompt'].tolist(), model_name="gpt-4-turbo")

# First evaluation
for index, row in df.iterrows():
    reference_explanation = row['reference_explanation']
    generated_explanation = row['generated_explanation']
    similarity_score, is_satisfactory = evaluate_explanation_nlp(generated_explanation, reference_explanation)
    
    df.at[index, 'similarity_score'] = similarity_score
    df.at[index, 'evaluation_result'] = "Satisfactory" if is_satisfactory else "Needs Refinement"
    
    # Mark completed cycle for satisfactory rows (first cycle is 1)
    if is_satisfactory:
        df.at[index, 'completed_cycle'] = 1

# Refine until all rows are satisfactory
refine_until_satisfactory(df)

# Drop the 'prompt' column as it's no longer needed
df.drop(columns=['prompt'], inplace=True)

# Save the dataframe with the new columns
output_file = f'output_data/bug_fix_explain_{source_name}_sample_{sample_size}_output_gpt.csv'
df.to_csv(output_file, index=False)

print(f"Explanations have been generated, evaluated, refined, and saved to {output_file} successfully.")


Explanations have been generated, evaluated, refined, and saved to output_data/bug_fix_explain_data0_sample_5_output_gpt.csv successfully.
