<a href="https://colab.research.google.com/github/RedPlunder/CS450-Winter/blob/main/Test_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**RAG version: N/A**

**Prompt version: N/A**

**gpt version: 4o-mini**

*Last Modification: 3/10 Jinwei*


In [None]:
import pandas as pd
import openai
import asyncio
import nest_asyncio
from google.colab import userdata

In [None]:
# Retrieve OpenAI API key from Google Colab user data
openai.api_key = userdata.get("OPENAI_API_KEY")

# Use OpenAI AsyncClient for parallel API calls
client = openai.AsyncClient(api_key=openai.api_key)

# Allow running asyncio inside Jupyter Notebook (Colab)
nest_asyncio.apply()

In [None]:
async def generate_responses(queries):
    """
    Generate responses for a batch of queries using OpenAI API.
    """
    messages_batch = [[{"role": "user", "content": query}] for query in queries]

    async def fetch_response(messages):
        try:
            response = await client.chat.completions.create(
                model="gpt-4o-mini",
                messages=messages,
                max_tokens=4090,
                temperature=0
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"Error fetching response: {e}")
            return "Error: Could not generate response."

    # Execute API calls in parallel
    responses = await asyncio.gather(*(fetch_response(messages) for messages in messages_batch))
    return responses

In [None]:
async def process_questions(file_path, batch_size=40):
    """
    Process unanswered questions in a CSV file, generate responses, and save the results.
    """
    df = pd.read_csv(file_path, encoding="utf-8")

    # Ensure the response column exists
    response_col_name = "gpt_Generated_Response"
    if response_col_name not in df.columns:
        df[response_col_name] = ""

    # Filter unanswered questions
    unanswered_mask = df[response_col_name] == ""
    unanswered_df = df[unanswered_mask]

    # Process questions in batches
    for start in range(0, len(unanswered_df), batch_size):
        batch = unanswered_df.iloc[start : start + batch_size]
        queries = batch["Question Body"].tolist()

        # Generate responses using OpenAI API
        responses = await generate_responses(queries)

        # Update DataFrame with responses
        df.loc[batch.index, response_col_name] = responses

        # Display progress
        processed = start + len(batch)
        progress_percent = (processed / len(unanswered_df)) * 100
        print(f"Processed {processed} / {len(unanswered_df)} questions ({progress_percent:.1f}%)")

    # Save updated CSV file
    df.to_csv(file_path, index=False, encoding="utf-8")
    print(f"Processing complete. Results saved to {file_path}")

In [None]:
if __name__ == "__main__":
    file_path = "./dataset/test.csv"
    await process_questions(file_path)