In [11]:
import glob
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template
from langchain.llms import HuggingFaceHub

In [None]:
def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf,strict=False)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

pdf_text = get_pdf_text(["books\ml_for_begineer.pdf"])
# print(pdf_text)

In [28]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

chunks = text_splitter.split_text(pdf_text)
# print(chunks[0])

In [29]:
def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

pdf_txt_chunks = get_text_chunks(pdf_text)
# print(pdf_txt_chunks[0])

In [44]:
# Create and store vector embeddings

def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore
# print(get_vectorstore(pdf_txt_chunks[0]))

In [45]:
def get_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})

    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain


In [46]:
from dotenv import load_dotenv

load_dotenv()

# Hard-coded inputs
pdf_docs = glob.glob('books/*.pdf')
# print(f"Ask question from these books:{pdf_docs}")

# Process the PDFs and create conversation chain
print("Processing...")
print()
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_text_chunks(raw_text)
vectorstore = get_vectorstore(text_chunks)
conversation = get_conversation_chain(vectorstore)

Processing...



In [48]:
# Handle user input
query = conversation(
    {'question': 'what was my first question?'}
               )
print(query['answer'])

The scenario of intelligent machines turning on humans in a struggle of the fittest is a common theme in discussions about artificial intelligence and the future. This concept is often depicted in science fiction and raises concerns about the potential risks associated with highly advanced AI systems. However, it's important to remember that this is a speculative scenario and not a foregone conclusion. The development and deployment of AI technologies are guided by ethical considerations and safety measures to prevent such outcomes. The field of AI ethics is actively addressing these concerns to ensure that AI systems are designed and used responsibly.
