***OpenAI App Testing run and generation of file with App Output***

**Loading packages, libraries and secrets into notebook**

In [1]:
# Importing the required libraries
import os
from dotenv import load_dotenv
from datasets import load_dataset
import pandas as pd

In [2]:
# Accessing the secrets from the environment variables
load_dotenv()
MONGO_URI_SQL = os.getenv("MONGO_URI_SQL")
MONGO_URI_schema = os.getenv("MONGO_URI_Schema")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
HF_Token = os.getenv("HF_TOKEN")

**Testing App 1b**

In [None]:
%run ../../A_Apps/1b_Openai_RAG_Schema.ipynb

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "../8_Testing_Input_and_Output/Spider_Testing_Selection.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_1b = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_1b_testing = testing_1b["train"].to_pandas()

# Print a few rows to verify
print(df_1b_testing.head())

In [None]:
# Function to run the chain for each query
def process_queries(df_1b_testing):
    # Create an empty list to store the results
    output = []

    for i, row in df_1b_testing.iterrows():
        # Get the query from the dataframe
        query = row["Query"]
        DB_name = row["DB_name"]
        input_value = query if not DB_name else DB_name + query

        # Execute the chain with the current query
        try:
            result = chain_1b.invoke(input_value)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"
        
        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_1b_testing["Output"] = output
    
# Check and split the Output column into two: Translation and Explanation
    def split_output(text):
        if 'Explanation' in text:
            parts = text.split('Explanation', 1)
            return parts[0].strip(), parts[1].strip()
        else:
            return text, None  # If no "Explanation", return the text as translation, and None for explanation

    # Apply the splitting function to the Output column
    df_1b_testing[['Translation', 'Explanation']] = df_1b_testing["Output"].apply(lambda x: pd.Series(split_output(x)))
    
    return df_1b_testing

# Call the function and process the dataframe
df_1b_testing_output = process_queries(df_1b_testing)

# Now 'df_with_results' contains the original queries and their corresponding results
print(df_1b_testing_output)
df_1b_testing_output.to_csv("../8_Testing_Input_and_Output/App_Output_1b.csv", index=False)

**Testing App 2b**

In [None]:
%run ../../A_Apps/2b_Openai_RAG.ipynb

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "../8_Testing_Input_and_Output/Spider_Testing_Selection.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_2b = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_2b_testing = testing_2b["train"].to_pandas()

# Print a few rows to verify
print(df_2b_testing.head())

In [None]:
# Function to run the chain for each query
def process_queries(df_2b_testing):
    # Create an empty list to store the results
    output = []

    for i, row in df_2b_testing.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = chain_2b.invoke(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"
        
        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_2b_testing["Output"] = output
    
# Check and split the Output column into two: Translation and Explanation
    def split_output(text):
        if 'Explanation' in text:
            parts = text.split('Explanation', 1)
            return parts[0].strip(), parts[1].strip()
        else:
            return text, None  # If no "Explanation:", return the text as translation, and None for explanation

    # Apply the splitting function to the Output column
    df_2b_testing[['Translation', 'Explanation']] = df_2b_testing["Output"].apply(lambda x: pd.Series(split_output(x)))
    
    return df_2b_testing

# Call the function and process the dataframe
df_2b_testing_output = process_queries(df_2b_testing)

# Now 'df_with_results' contains the original queries and their corresponding results
print(df_2b_testing_output)
df_2b_testing_output.to_csv("../8_Testing_Input_and_Output/App_Output_2b.csv", index=False)

**Testing App 3b**

In [9]:
%run ../../A_Apps/3b_Openai.ipynb

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "../8_Testing_Input_and_Output/Spider_Testing_Selection.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_3b = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_3b_testing = testing_3b["train"].to_pandas()

# Print a few rows to verify
print(df_3b_testing.head())

In [None]:
# Function to run the chain for each query
def process_queries(df_3b_testing):
    # Create an empty list to store the results
    output = []

    for i, row in df_3b_testing.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = chain_3b.invoke(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"
        
        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_3b_testing["Output"] = output
    
# Check and split the Output column into two: Translation and Explanation
    def split_output(text):
        if 'Explanation' in text:
            parts = text.split('Explanation', 1)
            return parts[0].strip(), parts[1].strip()
        else:
            return text, None  # If no "Explanation:", return the text as translation, and None for explanation

    # Apply the splitting function to the Output column
    df_3b_testing[['Translation', 'Explanation']] = df_3b_testing["Output"].apply(lambda x: pd.Series(split_output(x)))
    
    return df_3b_testing

# Call the function and process the dataframe
df_3b_testing_output = process_queries(df_3b_testing)

# Now 'df_with_results' contains the original queries and their corresponding results
print(df_3b_testing_output)
df_3b_testing_output.to_csv("../8_Testing_Input_and_Output/App_Output_3b.csv", index=False)