***CodeGemma App Testing run and generation of file with App Output***

In [None]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

***Loading packages, libraries and secrets into the notebook***

In [None]:
# Installing the required packages
!pip install pandas==2.1.4 numpy==1.23.5 tensorflow==2.15 datasets
# install below if using GPU
!pip install accelerate

In [2]:
# Importing the required libraries
from datasets import load_dataset
import pandas as pd
import os

In [3]:
# Accessing the secrets from the environment variables
#load_dotenv()
#MONGO_URI_SQL = os.getenv("MONGO_URI_SQL")
#MONGO_URI_schema = os.getenv("MONGO_URI_Schema")
#HF_Token = os.getenv("HF_TOKEN")

# In Google Colab, you can use the following code to access the secret
from google.colab import userdata
MONGO_URI_SQL = userdata.get('MONGO_URI_SQL')
MONGO_URI_schema = userdata.get('MONGO_URI_Schema')
HF_Token = userdata.get('HF_TOKEN')

**Testing App 1a**

In [None]:
%run "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/1a_Gemma_RAG_Schema_CG.ipynb"

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_1.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_1a_1 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_1a_testing_1 = testing_1a_1["train"].to_pandas()

# Print a few rows to verify
print(df_1a_testing_1.head())

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_2.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_1a_2 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_1a_testing_2 = testing_1a_2["train"].to_pandas()

# Print a few rows to verify
print(df_1a_testing_2.head())

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_3.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_1a_3 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_1a_testing_3 = testing_1a_3["train"].to_pandas()

# Print a few rows to verify
print(df_1a_testing_3.head())

In [None]:
# Function to run the chain for each query
def process_queries_schema_1(df_1a_testing_1):
    # Create an empty list to store the results
    output = []

    for i, row in df_1a_testing_1.iterrows():
        # Get the question from the dataframe
        query = row["Query"]
        DB_name = row["DB_name"]

        # Execute the chain with the current query
        try:
            result = chain_1a_invoke(query, DB_name)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_1a_testing_1["Output"] = output

    return df_1a_testing_1

# Call the function and process the dataframe
df_1a_testing_output_1 = process_queries_schema_1(df_1a_testing_1)

In [None]:
# Function to run the chain for each query
def process_queries_schema_2(df_1a_testing_2):
    # Create an empty list to store the results
    output = []

    for i, row in df_1a_testing_2.iterrows():
        # Get the query from the dataframe
        query = row["Query"]
        DB_name = row["DB_name"]

        # Execute the chain with the current query
        try:
            result = chain_1a_invoke(query, DB_name)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_1a_testing_2["Output"] = output

    return df_1a_testing_2

# Call the function and process the dataframe
df_1a_testing_output_2 = process_queries_schema_2(df_1a_testing_2)

In [None]:
# Function to run the chain for each query
def process_questions_schema_3(df_1a_testing_3):
    # Create an empty list to store the results
    output = []

    for i, row in df_1a_testing_3.iterrows():
        # Get the query from the dataframe
        query = row["Query"]
        DB_name = row["DB_name"]

        # Execute the chain with the current query
        try:
            result = chain_1a_invoke(query, DB_name)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_1a_testing_3["Output"] = output

    return df_1a_testing_3

# Call the function and process the dataframe
df_1a_testing_output_3 = process_questions_schema_3(df_1a_testing_3)

In [None]:
# Concatenating the three parts of the testing file
df_combined_1a = pd.concat([df_1a_testing_output_1, df_1a_testing_output_2, df_1a_testing_output_3], ignore_index=True)

# Convert the combined DataFrame into a single CSV file
df_combined_1a.to_csv("App_Output_1a_CG.csv", index=False)

**Testing App 2a**

In [None]:
%run "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/2a_Gemma_RAG_CG.ipynb"

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_1.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_2a_1 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_2a_testing_1 = testing_2a_1["train"].to_pandas()

# Print a few rows to verify
print(df_2a_testing_1.head())

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_2.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_2a_2 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_2a_testing_2 = testing_2a_2["train"].to_pandas()

# Print a few rows to verify
print(df_2a_testing_2.head())

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_3.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_2a_3 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_2a_testing_3 = testing_2a_3["train"].to_pandas()

# Print a few rows to verify
print(df_2a_testing_3.head())

In [None]:
# Function to run the chain for each query
def process_queries_1(df_2a_testing_1):
    # Create an empty list to store the results
    output = []

    for i, row in df_2a_testing_1.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = process_query_RAG(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_2a_testing_1["Output"] = output

    return df_2a_testing_1

# Call the function and process the dataframe
df_2a_testing_output_1 = process_queries_1(df_2a_testing_1)

In [None]:
# Function to run the chain for each query
def process_queries_2(df_2a_testing_2):
    # Create an empty list to store the results
    output = []

    for i, row in df_2a_testing_2.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = process_query_RAG(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_2a_testing_2["Output"] = output

    return df_2a_testing_2

# Call the function and process the dataframe
df_2a_testing_output_2 = process_queries_2(df_2a_testing_2)

In [None]:
# Function to run the chain for each query
def process_queries_3(df_2a_testing_3):
    # Create an empty list to store the results
    output = []

    for i, row in df_2a_testing_3.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = process_query_RAG(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_2a_testing_3["Output"] = output

    return df_2a_testing_3

# Call the function and process the dataframe
df_2a_testing_output_3 = process_queries_3(df_2a_testing_3)

In [None]:
# Concatenating the three parts of the testing file
df_combined_2a = pd.concat([df_2a_testing_output_1, df_2a_testing_output_2, df_2a_testing_output_3], ignore_index=True)

# Convert the combined DataFrame into a single CSV file
df_combined_2a.to_csv("App_Output_2a_CG.csv", index=False)

**Testing App 3a**

In [None]:
%run "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/3a_Gemma_CG.ipynb"

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_1.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_3a_1 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_3a_testing_1 = testing_3a_1["train"].to_pandas()

# Print a few rows to verify
print(df_3a_testing_1.head())

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_2.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_3a_2 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_3a_testing_2 = testing_3a_2["train"].to_pandas()

# Print a few rows to verify
print(df_3a_testing_2.head())

In [None]:
# Upload the dataset and transform to dataframe
# Define the dataset path
dataset_path = "/content/drive/MyDrive/Colab.Notebooks/Gemma_Testing/Spider_Testing_Selection_3.csv"
print("Dataset Path:", dataset_path)

# Check if the file exists at the specified path
if not os.path.isfile(dataset_path):
    raise FileNotFoundError(f"Unable to find the file at {dataset_path}")

# Load the dataset
testing_3a_3 = load_dataset('csv', data_files=dataset_path)

# Convert the dataset to a pandas dataframe
df_3a_testing_3 = testing_3a_3["train"].to_pandas()

# Print a few rows to verify
print(df_3a_testing_3.head())

In [None]:
# Function to run the chain for each query
def process_queries_1(df_3a_testing_1):
    # Create an empty list to store the results
    output = []

    for i, row in df_3a_testing_1.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = process_query(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_3a_testing_1["Output"] = output

    return df_3a_testing_1

# Call the function and process the dataframe
df_3a_testing_output_1 = process_queries_1(df_3a_testing_1)

In [9]:
# Function to run the chain for each query
def process_queries_2(df_3a_testing_2):
    # Create an empty list to store the results
    output = []

    for i, row in df_3a_testing_2.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = process_query(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_3a_testing_2["Output"] = output

    return df_3a_testing_2

# Call the function and process the dataframe
df_3a_testing_output_2 = process_queries_2(df_3a_testing_2)

In [10]:
# Function to run the chain for each query
def process_queries_3(df_3a_testing_3):
    # Create an empty list to store the results
    output = []

    for i, row in df_3a_testing_3.iterrows():
        # Get the query from the dataframe
        query = row["Query"]

        # Execute the chain with the current query
        try:
            result = process_query(query)
        except Exception as e:
            result = f"Error processing query {i}: {str(e)}"

        # Append the result to the list
        output.append(result)

    # Add the results to a new column in the dataframe
    df_3a_testing_3["Output"] = output

    return df_3a_testing_3

# Call the function and process the dataframe
df_3a_testing_output_3 = process_queries_1(df_3a_testing_3)

In [11]:
# Concatenating the three parts of the testing file
df_combined_3a = pd.concat([df_3a_testing_output_1, df_3a_testing_output_2, df_3a_testing_output_3], ignore_index=True)

# Convert the combined DataFrame into a single CSV file
df_combined_3a.to_csv("App_Output_3a_CG.csv", index=False)