## Overlaping Acts Online Models

In [2]:
# Importing required libraries
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import CharacterTextSplitter
import os

### Step 1: Reading Combined text

In [4]:
# Define the path to the file in the root folder
file_path = os.path.join(os.getcwd(), "combined_extracted_texts.txt")

def read_combined_text(file_path):
    """
    Reads the combined text from a file.

    Args:
        file_path (str): Path to the text file.
    
    Returns:
        str: The content of the file.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        return f.read()


In [None]:
# Test reading the file
text_content = read_combined_text(file_path)
print(text_content[:500])  # Print the first 500 characters to verify



Biosafety Act.pdf
LAWS OF KENYA
THE BIOSAFETY ACT
CAP. 320
Revised and published by the National Council for Law Reporting
with the authority of the Attorney-General as gazetted by the Government Printer
www.kenyalaw.orgKenya
Biosafety Act
Cap. 320
Legislation as at 11 December 2023
 
 
 
 
 
 
 
 
 
 
 
 
By Kenya Law and Laws.Africa.


### Step 2: Split Text into chunks

In [7]:
# split_text.py
def split_text_into_chunks(text):
    """
    Splits text into manageable chunks for embedding.

    Args:
        text (str): The full text to split.
    
    Returns:
        list: List of text chunks.
    """
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)


In [8]:
# Test
text_chunks = split_text_into_chunks(text_content)
print(f"Number of chunks: {len(text_chunks)}")

Number of chunks: 2812


### Step 3: Generate Embeddings for Offline or Online Use
- Online: Use OpenAIEmbeddings with your OpenAI API key.
- Offline: Use SentenceTransformer for embedding generation.

In [9]:
from langchain.embeddings import OpenAIEmbeddings

def generate_online_embeddings(text_chunks, openai_api_key):
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    return embeddings.embed_documents(text_chunks)

# Example usage
# online_embeddings = generate_online_embeddings(text_chunks, "your-openai-api-key")


In [17]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Retrieve the OpenAI API key from the environment
openai_api_key = os.getenv("OPENAI_API_KEY")

if openai_api_key is None:
    raise ValueError("API key is not set. Please check your .env file.")


In [None]:
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

def create_online_agent(faiss_index, openai_api_key):
    llm = ChatOpenAI(openai_api_key=openai_api_key)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    vector_store = FAISS(faiss_index, "cpu")
    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(),
        memory=memory,
    )



In [None]:
online_agent = create_online_agent("faiss_index", openai_api_key)


In [None]:
print("Ask your question:")
while True:
    query = input("> ")
    if query.lower() in ["exit", "quit"]:
        print("Exiting the chat.")
        break
    response = online_agent({"question": query})
    print("Agent:", response["answer"])
