***Testing of 1a Gemma with RAG and Schema***

**Loading packages, libraries and secrets into notebook**

In [8]:
# Importing the required libraries
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
import os
from dotenv import load_dotenv
from datasets import load_dataset
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_anthropic import ChatAnthropic

In [9]:
# Accessing the secrets from the environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

**Loading data into dataframe for testing**

In [10]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "../8_Testing_Input_and_Output/App_Output_1a.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_output_1a = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_1a_testing_output = testing_output_1a['train'].to_pandas()

# Print a few rows to verify
print(df_1a_testing_output.head())

Dataset Path: ../8_Testing_Input_and_Output/App_Output_1a.csv


Generating train split: 0 examples [00:00, ? examples/s]

            DB_ID                                              Query  \
0  concert_singer  SELECT T2.name ,  T2.capacity FROM concert AS ...   
1          pets_1  SELECT T1.fname ,  T1.age FROM student AS T1 J...   
2           car_1  SELECT T1.CountryName FROM COUNTRIES AS T1 JOI...   
3           car_1  SELECT T2.MakeId ,  T2.Make FROM CARS_DATA AS ...   
4           car_1  select t1.id ,  t1.maker from car_makers as t1...   

                                            Question  \
0  Show the stadium name and capacity with most n...   
1  Find the first name and age of students who ha...   
2  Which countries in europe have at least 3 car ...   
3  Among the cars with more than lowest horsepowe...   
4  Which are the car makers which produce at leas...   

                                              Output  \
0  The query aims to find the name of the stadium...   
1  The query aims to find the first name and age ...   
2  The query aims to identify the countries that ...   
3  The

**Testing Template**

In [11]:
# Chain setup (the model chain for comparison)
testing_template = """
"How well does the following Explanation explain the SQL Query? Please assess it critically then assign and output one of the following scores where 4 is the highest and 1 is the lowest: Acceptable (4), Minor errors (3), Major errors (2), or Unacceptable (1). To determine the score, go through the assessment step by step and consider the accuracy and understandability of the translation and explanation."

SQL Query: {query}

Explanation: {explanation}

Question: {question}
"""

prompt_testing = ChatPromptTemplate.from_template(testing_template)

**OpenAI Assessment**

In [12]:
# Model and parsing setup
model = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4o-mini")
parser = StrOutputParser()

chain_testing_OAI = (
    {"query": RunnablePassthrough(), "explanation": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt_testing
    | model
    | parser
)

# Function to compare each question and result using the chain
def Explanation_testing_OAI(df_1a_testing_output):
    assessment_OAI = []
    
    for i, row in df_1a_testing_output.iterrows():
        # Get the question and result from the dataframe
        query = row["Query"]
        question = row["Question"]
        explanation = row["Explanation"]
        
        # Create a dictionary with query and result to pass to the chain
        inputs = {"query": query, "explanation": explanation, "question" : question}

        # Run the chain and catch any potential errors
        try:
            test_output_OAI = chain_testing_OAI.invoke(inputs)
        except Exception as e:
            test_output_OAI = f"Error in row {i}: {str(e)}"
        
        # Store the comparison output
        assessment_OAI.append( test_output_OAI)
    
    # Add the comparison results to a new column
    df_1a_testing_output["Assessment OAI"] = assessment_OAI
    
    return df_1a_testing_output

# Call the function and process the dataframe
df_explanation_assessment_OAI = Explanation_testing_OAI(df_1a_testing_output)

**Gemini Assessment**

In [13]:
Gemini_model = ChatGoogleGenerativeAI(model="gemini-pro", api_key=GOOGLE_API_KEY)


chain_testing_Gemi = (
    {"query": RunnablePassthrough(), "explanation": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt_testing
    | Gemini_model
    | parser
)

# Function to compare each question and result using the chain
def testing_Gemi(df_1a_testing_output):
    assessment_Gemi = []
    
    for i, row in df_1a_testing_output.iterrows():
        # Get the question and result from the dataframe
        query = row["Query"]
        question = row["Question"]
        explanation = row["Explanation"]
        
        # Create a dictionary with query and result to pass to the chain
        inputs = {"query": query, "explanation": explanation, "question" : question}

        # Run the chain and catch any potential errors
        try:
            test_output_Gemi = chain_testing_Gemi.invoke(inputs)
        except Exception as e:
            test_output_Gemi = f"Error in row {i}: {str(e)}"
        
        # Store the comparison output
        assessment_Gemi.append(test_output_Gemi)
    
    # Add the comparison results to a new column
    df_1a_testing_output["Assessment Gemini"] = assessment_Gemi
    
    return df_1a_testing_output

# Call the function and process the dataframe
df_explanation_assessment_Gemi = testing_Gemi(df_1a_testing_output)

**Claude Assessment**

In [14]:
Claude_model = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=ANTHROPIC_API_KEY)


chain_testing_Claude = (
    {"query": RunnablePassthrough(), "explanation": RunnablePassthrough(), "question": RunnablePassthrough()}
    | prompt_testing
    | Claude_model
    | parser
)

# Function to compare each question and result using the chain
def testing_Claude(df_1a_testing_output):
    assessment_Claude = []
    
    for i, row in df_1a_testing_output.iterrows():
        # Get the question and result from the dataframe
        query = row["Query"]
        question = row["Question"]
        explanation = row["Explanation"]
        
        # Create a dictionary with query and result to pass to the chain
        inputs = {"query": query, "explanation": explanation, "question" : question}

        # Run the chain and catch any potential errors
        try:
            test_output_Claude = chain_testing_Claude.invoke(inputs)
        except Exception as e:
            test_output_Claude = f"Error in row {i}: {str(e)}"
        
        # Store the comparison output
        assessment_Claude.append(test_output_Claude)
    
    # Add the comparison results to a new column
    df_1a_testing_output["Assessment Claude"] = assessment_Claude
    
    return df_1a_testing_output

# Call the function and process the dataframe
df_explanation_assessment_Claude = testing_Claude(df_1a_testing_output)

# Save the dataframe, including the comparison, to a CSV file
df_explanation_assessment_Claude.to_csv("../8_Testing_Input_and_Output/Explanation_assessment_1a.csv", index=False)