***Testing of 2b Openai with RAG***

**Loading packages, libraries and secrets into notebook**

In [1]:
%run ../Setup.ipynb

Note: you may need to restart the kernel to use updated packages.

Note: you may need to restart the kernel to use updated packages.


**Loading data into dataframe for testing**

In [2]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "Testing_Output_2b.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_output_2b = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_2b_testing_output = testing_output_2b['train'].to_pandas()

# Print a few rows to verify
print(df_2b_testing_output.head())

Dataset Path: Testing_Output_2b.csv
            DB_ID                                              Query  \
0  concert_singer  SELECT T2.name ,  T2.capacity FROM concert AS ...   
1          pets_1  SELECT T1.fname ,  T1.age FROM student AS T1 J...   
2           car_1  SELECT T1.CountryName FROM COUNTRIES AS T1 JOI...   
3           car_1  SELECT T2.MakeId ,  T2.Make FROM CARS_DATA AS ...   
4           car_1  select t1.id ,  t1.maker from car_makers as t1...   

                                            Question  \
0  Show the stadium name and capacity with most n...   
1  Find the first name and age of students who ha...   
2  Which countries in europe have at least 3 car ...   
3  Among the cars with more than lowest horsepowe...   
4  Which are the car makers which produce at leas...   

                                              Output  \
0  Translation: Find the name and capacity of the...   
1  Translation: Find the first name and age of st...   
2  Translation: Show the n

**Testing Template**

In [3]:
# Chain setup (the model chain for comparison)
template_testing = """
"How well does the following Explanation explain the SQL Query? Please assign and output one of the following scores where 4 is the highest and 1 is the lowest: Acceptable (4), Minor errors (3), Major errors (2), or Unacceptable (1). To determine the score, go through the assessment step by step and consider the accuracy and understandability of the translation and explanation."

SQL Query: {query}

Natural Language Explanation: {explanation}

Question: {question}
"""

prompt_testing = ChatPromptTemplate.from_template(template_testing)

**OpenAI Assessment**

In [4]:
# Model and parsing setup
model = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")
parser = StrOutputParser()

chain_testing_OAI = (
    {"query": RunnablePassthrough(), "explanation": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt_testing
    | model
    | parser
)

# Function to compare each question and result using the chain
def Explanation_testing_OAI(df_2b_testing_output):
    assessment_OAI = []
    
    for i, row in df_2b_testing_output.iterrows():
        # Get the question and result from the dataframe
        query = row["Query"]
        question = row["Question"]
        explanation = row["Explanation"]
        
        # Create a dictionary with query and result to pass to the chain
        inputs = {"query": query, "explanation": explanation, "question" : question}

        # Run the chain and catch any potential errors
        try:
            test_output_OAI = chain_testing_OAI.invoke(inputs)
        except Exception as e:
            test_output_OAI = f"Error in row {i}: {str(e)}"
        
        # Store the comparison output
        assessment_OAI.append( test_output_OAI)
    
    # Add the comparison results to a new column
    df_2b_testing_output["Assessment OAI"] = assessment_OAI
    
    return df_2b_testing_output

# Call the function and process the dataframe
df_explanation_assessment_OAI = Explanation_testing_OAI(df_2b_testing_output)

**Gemini Assessment**

In [5]:
Gemini_model = ChatGoogleGenerativeAI(model="gemini-pro", api_key=GOOGLE_API_KEY)


chain_testing_Gemi = (
    {"query": RunnablePassthrough(), "explanation": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt_testing
    | Gemini_model
    | parser
)

# Function to compare each question and result using the chain
def testing_Gemi(df_2b_testing_output):
    assessment_Gemi = []
    
    for i, row in df_2b_testing_output.iterrows():
        # Get the question and result from the dataframe
        query = row["Query"]
        question = row["Question"]
        explanation = row["Explanation"]
        
        # Create a dictionary with query and result to pass to the chain
        inputs = {"query": query, "explanation": explanation, "question" : question}

        # Run the chain and catch any potential errors
        try:
            test_output_Gemi = chain_testing_Gemi.invoke(inputs)
        except Exception as e:
            test_output_Gemi = f"Error in row {i}: {str(e)}"
        
        # Store the comparison output
        assessment_Gemi.append(test_output_Gemi)
    
    # Add the comparison results to a new column
    df_2b_testing_output["Assessment Gemini"] = assessment_Gemi
    
    return df_2b_testing_output

# Call the function and process the dataframe
df_explanation_assessment_Gemi = testing_Gemi(df_2b_testing_output)

**Claude Assessment**

In [6]:
Claude_model = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=ANTHROPIC_API_KEY)


chain_testing_Claude = (
    {"query": RunnablePassthrough(), "explanation": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt_testing
    | Claude_model
    | parser
)

# Function to compare each question and result using the chain
def testing_Claude(df_2b_testing_output):
    assessment_Claude = []
    
    for i, row in df_2b_testing_output.iterrows():
        # Get the question and result from the dataframe
        query = row["Query"]
        question = row["Question"]
        explanation = row["Explanation"]
        
        # Create a dictionary with query and result to pass to the chain
        inputs = {"query": query, "explanation": explanation, "question" : question}

        # Run the chain and catch any potential errors
        try:
            test_output_Claude = chain_testing_Claude.invoke(inputs)
        except Exception as e:
            test_output_Claude = f"Error in row {i}: {str(e)}"
        
        # Store the comparison output
        assessment_Claude.append(test_output_Claude)
    
    # Add the comparison results to a new column
    df_2b_testing_output["Assessment Claude"] = assessment_Claude
    
    return df_2b_testing_output

# Call the function and process the dataframe
df_explanation_assessment_Claude = testing_Claude(df_2b_testing_output)

# Save the dataframe, including the comparison, to a CSV file
df_explanation_assessment_Claude.to_csv("Explanation_assessment_2b.csv", index=False)