In [27]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from dotenv import load_dotenv
import os
from database_utils import calculate_embedding
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Qdrant
from qdrant_client.http.models import Distance, VectorParams
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
import json
import warnings

warnings.filterwarnings("ignore")


load_dotenv()
QDRANT_KEY = os.getenv('QDRANT_KEY')
QDRANT_CLUSTER_URL = os.getenv('QDRANT_CLUSTER_URL')
HUGGING_FACE_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Search for related articles

In [29]:
def query_qdrant(question, collection_name, qdrant_url, qdrant_key, top_k=5):
    """
    Query Qdrant database with a question to find the most similar records.

    Parameters:
    - question (str): The question or query string.
    - collection_name (str): The name of the Qdrant collection to query.
    - qdrant_url (str): The Qdrant server URL.
    - qdrant_key (str): API key for Qdrant authentication.
    - top_k (int): Number of top results to return (default is 5).

    Returns:
    - list: A list of the top-k results with metadata and similarity scores.
    """
    # Initialize Qdrant client
    client = QdrantClient(url=qdrant_url, api_key=qdrant_key)

    # Load a SentenceTransformer model for embedding generation
    model = FastEmbedEmbeddings()  # Replace with your model if needed

    # Generate embedding for the question
    question_embedding = calculate_embedding(question)

    # Perform the search in the specified collection
    search_results = client.search(
        collection_name=collection_name,
        query_vector=question_embedding,
        limit=top_k,  # Return top-k results
    )

    # Parse and return the results
    results = []
    for result in search_results:
        results.append({
            "id": result.id,
            "score": result.score,
            "payload": result.payload,  # Metadata of the record
        })

    return results

In [30]:
# Query the Qdrant database for articles related to "Steve Jobs"
results = query_qdrant("Give me articles about Steve Jobs", "bbc_news_articles", QDRANT_CLUSTER_URL, QDRANT_KEY)

print(json.dumps(results, indent=4))

[
    {
        "id": "94b597f0-1aa4-4b4e-855b-4374132b1fb9",
        "score": 0.6739781,
        "payload": {
            "page_content": "Newscast - Keir Diary\u2026 A Week in the Life of the PM - BBC SoundsKeir Diary\u2026 A Week in the Life of the PM Keir Diary\u2026 A Week in the Life of the PM",
            "label": "entertainment",
            "keywords": [
                "Keir Diary",
                "BBC SoundsKeir",
                "Week",
                "Life",
                "Newscast"
            ],
            "source": "http://www.bbc.co.uk/sounds/play/m0022js5"
        }
    },
    {
        "id": "96efba6b-b4c2-43bf-a4c3-c4f82debfb27",
        "score": 0.6449732,
        "payload": {
            "page_content": "Apple makes blogs reveal sources  Apple has won its legal fight to make three bloggers reveal who told them about unreleased products.  The bid to unmask the employees leaking information was launched in December 2004 following online articles about Apple's 

# Connect the LLM Q&A with the database search

In [32]:
# Initialize a Qdrant client with the specified URL and API, it allows interaction with the Qdrant vector database,
#which is used for storing and searching vectors
client = QdrantClient(url=QDRANT_CLUSTER_URL, api_key=QDRANT_KEY)

# Create a vector store using the Qdrant client and specify the collection name
# The embeddings are generated using the FastEmbedEmbeddings class
vector_store = Qdrant(
    client=client,
    collection_name="bbc_news_articles",
    embeddings=FastEmbedEmbeddings(),
)

# Initialize a language model (LLM) using OpenAI's ChatGPT with specified parameters
# The model used is "gpt-3.5-turbo" with a temperature setting of 0 for deterministic responses
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [33]:
# Create a RetrievalQA chain to handle question-answering tasks. The chain is initialized using the specified language model (llm) and a retriever
# This chain type retrieves all relevant documents, combines them into a single string ("stuffing" them together),
# and then passes the concatenated text to the language model for answering the query
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vector_store.as_retriever()
)

# Execute the QA chain with a query
# The query asks about the cost of an iPod with one gigabyte of storage, unveiled by Steve Jobs during the annual MacWorld speech
result = qa_chain({
    "query": "How much iPod with one gigabyte storage costs that was unveiled by Steve Jobs during annual MacWorld speech?"
})

# Extract and display the result of the query from the QA chain
result["result"]

'The iPod with one gigabyte of storage that was unveiled by Steve Jobs during the annual MacWorld speech costs $149 in the US and £99 in the UK.'

In [34]:
# Create a 'map_reduce' RetrievalQA chain to handle question-answering tasks
# This chain first maps over the retrieved documents, generating an intermediate answer for each document.
# Then, it reduces these intermediate answers into a final answer.
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vector_store.as_retriever(),
    chain_type="map_reduce"
)

# Execute the QA chain with a query
# The query asks about the cost of an iPod with one gigabyte of storage, unveiled by Steve Jobs during the annual MacWorld speech
result = qa_chain_mr({
    "query": "How much ipod with one gigabyte storage costs that was unveiled by Steve Jobs during annual MacWorld speech?"
})

# Extract and display the result of the query from the QA chain
result["result"]

#We can conclude map-reduce chain works worse than classic one

"I'm sorry, but the text provided does not mention the specific cost of an iPod with one gigabyte storage that was unveiled by Steve Jobs during an annual MacWorld speech. If you have any other questions or need further assistance, feel free to ask."

In [35]:
# Create a 'refine' RetrievalQA chain to handle question-answering tasks
# The chain generates an initial answer using the first retrieved document and then refines it iteratively with each subsequent document.
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vector_store.as_retriever(),
    chain_type="refine"
)

# Execute the QA chain with a query
# The query asks about the cost of an iPod with one gigabyte of storage, unveiled by Steve Jobs during the annual MacWorld speech
result = qa_chain_mr({
    "query": "How much ipod with one gigabyte storage costs that was unveiled by Steve Jobs during annual MacWorld speech?"
})

# Extract and display the result of the query from the QA chain
result["result"]

#We can conclude refine chain gets us a right answer but beside it a lot of unnecessary info

"The iPod with one gigabyte of storage that was unveiled by Steve Jobs during his annual MacWorld speech costs $149 in the US and £99 in the UK. This iPod was part of Apple's strategy to bring their products to a wider audience, including PC users who were already familiar with Apple's iPod. However, it is important to note that there was controversy surrounding Apple's practices at the time, as a user of Apple's iTunes music service sued the company for anti-competitive behavior. The lawsuit claimed that Apple's technology prevented songs bought from the iTunes store from playing on music players other than the iPod, alleging that Apple was unlawfully leveraging its monopoly in the online music market to stifle competition in the portable music player market. Despite this legal challenge, Apple continued to dominate the portable digital music player market with an 87% share and had sold over six million iPods since its launch."

# The same but with a prompt template

In [37]:
# Build a prompt template
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, 
just say that you don't know, don't try to make up an answer. Use 5 sentences maximum. Keep the answer as concise as possible. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)

# Build a retrieval chain using default 'stuff' chain type and "gpt-3.5-turbo" model
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vector_store.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

# Execute the QA chain with a query
result = qa_chain({"query": "How much ipod with one gigabyte storage costs that was unveiled by Steve Jobs during annual MacWorld speech?"})
result["result"]

'The iPod with one gigabyte of storage unveiled by Steve Jobs during the annual MacWorld speech costs $149 (£99 in the UK).'

# Add memory

In [39]:
# Initialize a Qdrant client
client = QdrantClient(url=QDRANT_CLUSTER_URL, api_key=QDRANT_KEY)

# Create a vector store for storing and retrieving embeddings
# The vector store interacts with the Qdrant client and uses the collection named "bbc_news_articles"
vector_store = Qdrant(
    client=client,
    collection_name="bbc_news_articles",
    embeddings=FastEmbedEmbeddings(),  # Generates embeddings for document storage and retrieval
)

# Initialize a conversation memory to store chat history
# ConversationBufferMemory keeps a buffer of previous interactions and returns them as part of the chat context
memory = ConversationBufferMemory(
    memory_key="chat_history",  
    return_messages=True
)

# Set up a language model (LLM) using OpenAI's GPT-4 model
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# Convert the vector store into a retriever
retriever = vector_store.as_retriever()

# Create a conversational retrieval chain with memory
# It enables the system to provide answers based on retrieved documents and maintain conversational context
qa = ConversationalRetrievalChain.from_llm(
    llm,             
    retriever=retriever,
    memory=memory
)

In [40]:
question = "How much ipod with one gigabyte storage costs that was unveiled by Steve Jobs during annual MacWorld speech?"
result = qa({"question": question})
result['answer']

'The iPod shuffle with one gigabyte of storage costs $149 (£99) as unveiled by Steve Jobs during the annual MacWorld speech.'

In [41]:
question = "Is there a cheaper option?"
result = qa({"question": question})
result['answer']

'Yes, Apple has released the iPod shuffle, which is a cheaper option compared to other iPod models. The iPod shuffle uses cheaper flash memory rather than hard drives and is available in two versions: one with 512MB of storage for $99 (£69 in the UK) and a second with 1GB of storage for $149 (£99).'