In [None]:
!pip install PyPDF2 langchain langchain_community huggingface_hub transformers sentence-transformers google-colab

In [None]:
!pip install faiss-cpu


In [None]:
from google.colab import userdata
hf_api_key=userdata.get('HUGGINGFACEHUB_TOKEN')

In [None]:
!pip install --upgrade tensorflow

In [None]:
!pip install tensorflow==2.12.0
!pip install --upgrade transformers

In [None]:
import os
import concurrent.futures
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.llms import HuggingFaceHub
from google.colab import files

# Load environment variables
load_dotenv()

# Hugging Face API Key
# hf_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# Function to extract text from PDFs
def get_pdf_text(pdf_files):
    text = ""

    def extract_text(pdf):
        pdf_reader = PdfReader(pdf)
        pdf_text = ""
        for page in pdf_reader.pages:
            page_text = page.extract_text()
            if page_text:
                pdf_text += page_text + "\n"
        return pdf_text

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(extract_text, pdf) for pdf in pdf_files]
        for future in concurrent.futures.as_completed(futures):
            text += future.result()

    return text

# Function to split text into chunks
def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)

# Function to create FAISS vector store
def get_vectorstore(text_chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

# Function to create a conversational chain
def get_conversation_chain(vectorstore):
    llm = HuggingFaceHub(
        repo_id="yeontaek/airoboros-2.1-llama-2-13B-QLoRa",
        model_kwargs={"temperature": 0.5, "max_length": 512},
        huggingfacehub_api_token=hf_api_key
    )

    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True
    )
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain

# Upload PDFs in Colab
print("Upload your PDFs")
uploaded_files = files.upload()

pdf_docs = list(uploaded_files.keys())
print("Processing PDFs...")

# Extract text from PDFs
raw_text = get_pdf_text(pdf_docs)

# Get text chunks
text_chunks = get_text_chunks(raw_text)

# Create FAISS vector store
vectorstore = get_vectorstore(text_chunks)

# Initialize conversation chain
conversation = get_conversation_chain(vectorstore)
print("PDFs processed successfully! You can now ask questions.")

# Chat loop
while True:
    user_question = input("Ask a question (or type 'exit' to quit): ")
    if user_question.lower() == "exit":
        break

    response = conversation({'question': user_question})
    chat_history = response['chat_history']

    for i, message in enumerate(chat_history):
        if i % 2 == 0:
            print(f"User: {message.content}")
        else:
            print(f"Bot: {message.content}")
