# Building and Deploying the RAG System on Google Colab


1. Setup Google Colab Environment


 install the required libraries

In [None]:
!pip install langchain faiss-cpu sentence-transformers openai pdfplumber plotly transformers

2. Setup API Keys and Environment Variables

To save your work and access files from Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

For online functionality, add your OpenAI API key. If offline, skip this step.

In [None]:
import os
os.environ['OPENAI_API_KEY'] = 'your_openai_api_key'

3. Implement RAG Components

Define a function to split large documents into manageable chunks

In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def chunk_document(text, chunk_size=800, chunk_overlap=100):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", ".", " "]
    )
    return text_splitter.split_text(text)

Online Embeddings (OpenAI)

In [None]:
from langchain.embeddings import OpenAIEmbeddings

def create_online_embeddings(chunks):
    embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
    return embeddings.embed_documents(chunks)

Offline Embeddings (Sentence-BERT)

In [None]:
from sentence_transformers import SentenceTransformer

def create_offline_embeddings(chunks):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    return [model.encode(chunk) for chunk in chunks]

Vector Store Indexing with FAISS

In [None]:
from langchain.vectorstores import FAISS

def create_vector_store(embeddings, chunks):
    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
    return vector_store

Question Answering Pipeline

Online QA

In [None]:
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

def online_response_pipeline(question, vector_store):
    retriever = vector_store.as_retriever()
    llm = OpenAI(model='gpt-4', openai_api_key=os.getenv('OPENAI_API_KEY'))
    qa_chain = RetrievalQA(llm=llm, retriever=retriever)
    return qa_chain.run(question)

Offline QA

In [None]:
from transformers import pipeline

def offline_response_pipeline(question, vector_store):
    retriever = vector_store.as_retriever()
    context_chunks = retriever.retrieve(question, top_k=5)
    context = "\n".join(context_chunks)

    qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
    answer = qa_pipeline({"context": context, "question": question})
    return answer['answer']

4. Integrate PDF Parsing

Upload and Parse PDFs

In [None]:
import pdfplumber

def parse_pdf(file_path):
    with pdfplumber.open(file_path) as pdf:
        text = "\n".join(page.extract_text() for page in pdf.pages)
    return text

Use the files module to upload SFCR PDFs:

In [None]:
from google.colab import files
uploaded = files.upload()

for file_name in uploaded.keys():
    text = parse_pdf(file_name)

5. Visualize Data

Use Plotly for interactive visualizations:

In [None]:
import plotly.express as px

def visualize_data(data):
    fig = px.bar(data, x='Year', y='Solvency Ratio', title='Solvency Ratios Over Time')
    fig.show()

6. Deploy Interactive Chatbot

Simple Chat Interface

In [None]:
import IPython.display as display

def chatbot_interface():
    document = input("Paste the SFCR document text: ")
    question = input("Enter your question: ")
    chunks = chunk_document(document)

    use_offline = input("Use offline mode? (yes/no): ").strip().lower() == 'yes'

    if use_offline:
        embeddings = create_offline_embeddings(chunks)
        vector_store = create_vector_store(embeddings, chunks)
        answer = offline_response_pipeline(question, vector_store)
    else:
        embeddings = create_online_embeddings(chunks)
        vector_store = create_vector_store(embeddings, chunks)
        answer = online_response_pipeline(question, vector_store)

    print(f"Answer: {answer}")

chatbot_interface()

7. Save and Share Results

In [None]:
with open('/content/drive/My Drive/SFCR_Results.txt', 'w') as f:
    f.write(answer)

Conclusion

This step-by-step guide walks you through building the RAG system on Google Colab. It includes:

Setting up embeddings and vector stores.

Parsing and processing PDF reports.

Visualizing results.

Deploying an interactive chatbot.