# Install required packages

In [7]:
!pip install langchain-community langchain-openai faiss-cpu pypdf




[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# Import all required libraries

In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

# Configuration settings

In [None]:
OPENAI_API_KEY = "your_openai_api_key_here"
pdf_path = "../data/CTSE_ALL.pdf"

# Load and split PDF


In [12]:
loader = PyPDFLoader(pdf_path)
documents = loader.load()

print(f"Loaded {len(documents)} documents.")
if len(documents) > 0:
    print(f"\nFirst 500 chars of first document:\n{documents[0].page_content[:500]}")

Loaded 408 documents.

First 500 chars of first document:
Intro to DevOps and Beyond
Ravindu Nirmal Fernando


# Text splitting


In [13]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

print(f"\nNumber of chunks after splitting: {len(texts)}")
print(f"Sample chunk (first 200 chars): {texts[0].page_content[:200]}")


Number of chunks after splitting: 383
Sample chunk (first 200 chars): Intro to DevOps and Beyond
Ravindu Nirmal Fernando


# Create embeddings and vector store


In [14]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
db = FAISS.from_documents(texts, embeddings)

print(f"\nNumber of documents in FAISS vector store: {db.index.ntotal}")


Number of documents in FAISS vector store: 383


# Initialize QA system


In [15]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=OPENAI_API_KEY)
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())

print("QA system initialized successfully!")

QA system initialized successfully!


# Define question answering function


In [16]:
def ask_question(question):
    if not question:
        return "Error: No question provided"

    print(f"\nProcessing question: {question}")

    try:
        result = qa.invoke({"query": question})
        return result['result']
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return f"Error: {str(e)}"

# Example usage


In [None]:
sample_question = "What is the main topic of this document?"
answer = ask_question(sample_question)
print("\nQuestion:", sample_question)
print("Answer:", answer)


Processing question: What are the topics of this document?

Question: What are the topics of this document?
Answer: The topics covered in this document include an Introduction to AWS cloud platform and its benefits, AWS Global Infrastructure, Accessing AWS Services, Interacting with AWS Services, Best Practices for managing AWS Accounts, Common AWS services, Demo, Brief History of infrastructure shifts over the decades, VMs vs Containers, What are containers and what problem does it solve, What is Docker, Deep dive into Docker Internals, and a Demo.
