In [4]:
import os
import time
import pandas as pd
import google.generativeai as genai
from google.api_core.exceptions import ResourceExhausted

# sample size and source_name
source_name = "data1"
sample_size = "1000"

# Configure the Gemini API key
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Define the model configuration
generation_config = {
    "temperature": 0.5,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 30,  # Adjust as needed to fit your requirements
    "response_mime_type": "text/plain",
}

# Create the model
model = genai.GenerativeModel(
    model_name="gemini-1.5-flash",  # Replace with the correct model name as needed
    generation_config=generation_config,
    # safety_settings: you can adjust this as needed
)

def get_explanation(buggy_code, fixed_code, retries=3, delay=10):
    """
    Function to get a brief, natural language explanation of why the fixed code is correct compared to the buggy code.
    Includes retry logic and delay handling for API rate limits.
    """
    prompt = f"""Explain in simple, clear language why the fixed code is correct and how it resolves the issue present in the buggy code. 
    Keep the explanation short, around 20 words. don't include things as 'The fixed code correctly uses' like unwanted words, only include useful words, simply why the fixed_code is correct compared to buggy_code.

    Buggy code:
    {buggy_code}

    Fixed code:
    {fixed_code}

    Why the fixed code is correct:"""

    for attempt in range(retries):
        try:
            # Start a chat session
            chat_session = model.start_chat(history=[])

            # Send the message (prompt) to the chat session
            response = chat_session.send_message(prompt)

            # Extract the text from the response
            explanation = response.text.strip()

            return explanation

        except ResourceExhausted:
            if attempt < retries - 1:
                print(f"Quota exceeded, retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                print("Max retries exceeded, returning None")
                return None

# Load your dataframe
df = pd.read_csv(f"source_data/{source_name}_sample_{sample_size}.csv")  # Replace with the actual path to your dataframe

# Apply the get_explanation function to each row and create a new column 'explanation'
df['explanation'] = df.apply(lambda row: get_explanation(row['buggy_code'], row['fixed_code']), axis=1)

# Save the dataframe with the new 'explanation' column
df.to_csv(f'output_data/bug_fix_explain_{source_name}_sample_{sample_size}_output_gemini.csv', index=False)

print("Explanations have been generated and saved successfully.")


Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Max retries exceeded, returning None
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Max retries exceeded, returning None
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Max retries exceeded, returning None
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Max retries exceeded, returning None
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Quota exceeded, retrying in 10 seconds...
Max retries exceeded, returning None
Quota exceeded, retrying in 10 seconds...
Quota exceeded, r