In [1]:
from langchain.vectorstores import Qdrant
from langchain.embeddings.openai import OpenAIEmbeddings
import qdrant_client 
import os
from dotenv import load_dotenv
from qdrant_client.http import models
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
import PyPDF2
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

### Create a qdrant client

In [2]:
load_dotenv()

os.environ['QDRANT_HOST'] = os.getenv("QDRANT_HOST")
os.environ['QDRANT_API_KEY'] = os.getenv("QDRANT_API_KEY")

client = qdrant_client.QdrantClient(
    os.getenv("QDRANT_HOST"),
    api_key=os.getenv("QDRANT_API_KEY")
)

### Create collection

In [None]:
os.environ['QDRANT_COLLECTION_NAME'] = "my-collection"

vectors_config = models.VectorParams(
    size=1536, #1536 for OpenAI
    distance=models.Distance.COSINE
    )

client.recreate_collection(
    collection_name=os.getenv("QDRANT_COLLECTION_NAME"),
    vectors_config=vectors_config
)    

### Create vector store

In [3]:
embeddings = OpenAIEmbeddings()

vector_store = Qdrant(
    client=client, 
    collection_name=os.getenv("QDRANT_COLLECTION_NAME"), 
    embeddings=embeddings,
)

### Add documents to vectore store

In [4]:
# extract the text of the pdf file
def get_pdf_text(filename):
    text = ""
    with open(filename, "rb") as f:
        pdf = PyPDF2.PdfReader(f)
        for page in range(len(pdf.pages)):
            pdf_page = pdf.pages[page]
            text += pdf_page.extract_text()
    return text

# create chunks of the extracted text
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        separators=[" ", ",", "\n"],
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

In [None]:
# add the specific pdf to the vector database

text = get_pdf_text("ISTQB-CT-TAE_Syllabus_v1.0_2016.pdf")
chunks = get_text_chunks(text)

vector_store.add_texts(chunks)

### Initializing a retrieval-based question-answering system and testing it

In [6]:
# plug vectore store into retrieval chain 

qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="stuff",
    retriever=vector_store.as_retriever()
)

In [None]:
query = "Was ist Risikobasierte Testautomation?"

response = qa.run(query)

print(response)