In [None]:
# Google Colab venv setup
!apt install python3.10-venv
!python -m venv /content/my_venv
!source /content/my_venv/bin/activate

**Gemma RAG LLM setup**

In [None]:
# Installing the required packages
!pip install langchain pandas==2.1.4 numpy==1.23.5 scipy==1.13.0 pymongo gradio requests langchain_community langchain_core langchain_mongodb sentence_transformers transformers python-dotenv tensorflow==2.15
!pip install -U transformers
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
#install below if using GPU
!pip install accelerate

In [None]:
# Importing the required libraries
from pymongo import MongoClient
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain.document_loaders import DirectoryLoader
from langchain.chains import RetrievalQA
import gradio as gr
from gradio.themes.base import Base
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from sentence_transformers import SentenceTransformer # https://huggingface.co/thenlper/gte-large
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
from dotenv import load_dotenv
from transformers import AutoConfig

***Accessing secrets***

In [None]:
# In Google Colab, you can use the following code to access the secret
from google.colab import userdata
HF_Token = userdata.get('HF_TOKEN')
MONGO_URI_SQL = userdata.get("MONGO_URI_SQL")
MONGO_URI_schema = userdata.get('MONGO_URI_Schema')

# In your local environment, you can use the following code to access the secret
#load_dotenv()
#HF_Token = os.getenv("HF_Token")
#MONGO_URI_SQL = os.getenv("MONGO_URI_SQL")
#MONGO_URI_schema = os.getenv('MONGO_URI_schema')

***Generating the embedding***

In [None]:
# Embedding model setup
embedding_model = SentenceTransformer("thenlper/gte-large")

class CustomEmbeddingFunction:
    def __init__(self, model):
        self.model = model

    def embed_documents(self, texts):
        """Embeds a list of documents."""
        embeddings = self.model.encode(texts)
        return embeddings.tolist()

    def embed_query(self, text):
        """Embeds a single query."""
        embedding = self.model.encode(text)
        return embedding.tolist()

# Wrap the SentenceTransformer model
embedding_function = CustomEmbeddingFunction(embedding_model)

In [None]:
## MongoDB setup
# SQL Vector
client_SQL = MongoClient(MONGO_URI_SQL)
dbName_SQL = "MVector"
collectionName_SQL = "MTSQL"
collection_SQL = client_SQL[dbName_SQL][collectionName_SQL]
index_name_SQL = "vector_index_SQL"

## SQL Vector setup
# Vector store setup
vector_store_SQL = MongoDBAtlasVectorSearch(
    client=client_SQL,
    database=dbName_SQL,
    collection=collection_SQL,
    index_name=index_name_SQL,
    embedding=embedding_function,
    text_key="Query"
)

In [None]:
# Schema Vector
client_schema = MongoClient(MONGO_URI_schema)
dbName_schema = "MVector"
collectionName_schema = "MTSchema"
collection_schema = client_schema[dbName_schema][collectionName_schema]
index_name_schema = "vector_index_schema"

## Schema Vector setup
# Vector store setup
vector_store_schema = MongoDBAtlasVectorSearch(
    client=client_schema,
    database=dbName_schema,
    collection=collection_schema,
    index_name=index_name_schema,
    embedding=embedding_function,
    text_key="Table_name"
)

***Loading the Tokenizer and LLM-Model***

In [None]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-it")
# CPU Enabled uncomment below 👇🏽
# model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it")
# GPU Enabled use below 👇🏽
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it", device_map="auto")

***Chain setup***

In [None]:
question = "SELECT T2.name ,  T2.capacity FROM concert AS T1 JOIN stadium AS T2 ON T1.stadium_id  =  T2.stadium_id WHERE T1.year  >=  2014 GROUP BY T2.stadium_id ORDER BY count(*) DESC LIMIT 1?"

# SQL Vector setup
retriever_SQL = vector_store_SQL.as_retriever(search_kwargs={"k": 4})

def logging_retriever_function_SQL(retriever_SQL, question):
    documents_SQL = retriever_SQL.invoke(question)
    print("Retrieved Documents:")
    for doc in documents_SQL:
        print(doc)
    return documents_SQL

def get_source_information_SQL(question):
    retrieved_docs = logging_retriever_function_SQL(retriever_SQL, question)
    source_information_SQL = "\n".join([str(doc) for doc in retrieved_docs])
    return source_information_SQL

information_summary_SQL = get_source_information_SQL(question)

# Schema Vector setup
retriever_schema = vector_store_schema.as_retriever(search_kwargs={"k": 10})

def logging_retriever_function_schema(retriever_schema, question):
    documents_schema = retriever_schema.invoke(question)
    print("Retrieved Schema:")
    for doc in documents_schema:
        print(doc)
    return documents_schema

def get_source_information_schema(question):
    retrieved_docs = logging_retriever_function_schema(retriever_schema, question)
    source_information_schema = "\n".join([str(doc) for doc in retrieved_docs])
    return source_information_schema

information_summary_schema = get_source_information_schema(question)

def generate_response(question):
    combined_information = (
          f"Instructions: You are teaching SQL and need to help a student understand a given SQL statement. Translate the SQL query into natural language and explain how it works step by step. Use the examples provided in the Context string to guide your translation. Refer to the Schema string to understand the tables and columns in the database. If you can't answer the question, reply I don't know.\n\n"
          f"Question: {question}\n\n"
          f"Context: {information_summary_SQL}\n\n"
          f"Schema: {information_summary_schema}\n\n"
          f"Response:\n"
    )

    # Moving tensors to GPU
    input_ids = tokenizer(combined_information, return_tensors="pt").to("cuda")
    response = model.generate(**input_ids, max_new_tokens=1000)
    decoded_response = tokenizer.decode(response[0], skip_special_tokens=True).strip()

    # Post-processing: Extracting the content after 'Response:\n'
    if "Response:" in decoded_response:
        decoded_response = decoded_response.split("Response:", 1)[-1].strip()

    return decoded_response

# Example usage
result = generate_response(question)
print(result)

***Chat interface setup***

In [None]:
# Define the chain_invoke function
def chain_invoke(question):
    # Execute the chain with the logging retriever
    result = generate_response(question)
    # Return the result
    return result

# Create a web interface for the app, using Gradio
with gr.Blocks(theme=Base(), title="Question Answering App using Vector Search + RAG") as demo:
    gr.Markdown(
        """
        # Question Answering App using Atlas Vector Search + RAG Architecture
        """)
    textbox = gr.Textbox(label="Enter your SQL statement:")
    with gr.Row():
        button = gr.Button("Submit", variant="primary")
    with gr.Column():
        output = gr.Textbox(lines=1, max_lines=30, label="Natural language translation and explanation:")

# Call chain_invoke function upon clicking the Submit button

    button.click(chain_invoke, textbox, outputs=output)

demo.launch()