# Advanced RAG

#### Problem: her sorguda farkli kategori retrieve edilince bellekte üstüne ekliyor, dolasiyla farkli kategorilerden data alabiliyor soru degistikce, vector database her seferinde hafizayi silmeli!
###### Bütüm kodta tutarlilik kontrolü yap, mesela hersey cevabi "answer" olarak dönmeli, fronend icin.

In [85]:
question = "Evime bir TV alacagim ve bir telkom ürünüyle kullanmak istiyorum, ne tavsiye edersin?"

### Initials

##### Data directory
###### data klasörünün altinda 8 ana kategörü olmali, tam olarak isimler uyusmali

In [86]:
# Define the directory containing the rag data
data_directory = "/Users/taha/Desktop/rag/data"

##### Imports

In [87]:
# Import necessary libraries
import os
import numpy as np
import glob
import chromadb
import asyncio
import gc
import tiktoken
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.summarize import load_summarize_chain
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, FewShotChatMessagePromptTemplate, PromptTemplate
from langchain.load import dumps, loads
from langchain.schema import Document
from operator import itemgetter
from typing import Literal, List, Tuple

##### Model

In [88]:
# Load API Keys from environment variables
load_dotenv()  # Load environment variables from a .env file

# Retrieve API keys from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")  # This key is loaded but not used in the code

# Initialize the chat model and embedding model
# ChatOpenAI is used to interact with the OpenAI GPT model, and OpenAIEmbeddings is used for generating embeddings for documents
model = ChatOpenAI(model="gpt-4o", api_key=OPENAI_API_KEY)
embedding = OpenAIEmbeddings(api_key=OPENAI_API_KEY)

In [89]:
# Model max token limit
MAX_TOKEN_LENGTH = 3000

##### Summarizing

In [90]:
# Load summarizing chain with "refine" method to reduce token size
summarize_chain = load_summarize_chain(model, chain_type="refine")

##### Tokenizer

In [91]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")

31

##### Token count

In [92]:
def get_token_count(docs, question, prompt):
    """
    If the total token count for the RAG chain exceeds the limit, summarize only the retrieved documents.

    Args:
        docs (list): List of documents to check for token limits and summarize if needed.
        question (str): The original question to include in token count.
        prompt (str): The prompt template to include in token count.
        max_token_length (int): The maximum number of tokens allowed before summarization.

    Returns:
        list: Summarized documents or original documents based on token limit.
    """
    # Calculate token counts for different components
    prompt_tokens = num_tokens_from_string(prompt.format(context="dummy", question=question), "cl100k_base")
    question_tokens = num_tokens_from_string(question, "cl100k_base")
    docs_tokens = sum([num_tokens_from_string(doc.page_content, "cl100k_base") for doc in docs])
    
    # Total token count including prompt, question, and documents
    total_tokens = prompt_tokens + question_tokens + docs_tokens
    #print(f"Token count (prompt): {prompt_tokens}")
    #print(f"Token count (question): {question_tokens}")
    #print(f"Token count (retrieved documents): {docs_tokens}")
    #print(f"Total token count (for RAG chain): {total_tokens}")
    
   
    return total_tokens

##### Cos similarity

In [93]:
# Function to calculate cosine similarity between two vectors
def cosine_similarity(vec1, vec2):
    """
    Computes the cosine similarity between two vectors.
    
    Parameters:
    - vec1 (np.ndarray): The first vector.
    - vec2 (np.ndarray): The second vector.
    
    Returns:
    - float: The cosine similarity between vec1 and vec2.
    """
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2) if (norm_vec1 and norm_vec2) else 0.0

## Routing

### Logical Routing

#### 8 Kategoriden birine atiyor, datacategory icerigine göre. Belki biraz daha genisletilebilir, daha uygun kategori atamasi icin.

###### ESKI NOT DEGERLENDIR: Routing mantigi calismadi retriever'i sadece ilk seferde filtreliyor, her seferinde chroma ya gömüyü ve hepsini ariyor.
###### chain invoke etmeden retriever cagrildigi yerde filtreleme olabilir. Bunu dene!

In [94]:
# Data model
class RouteQuery(BaseModel):
    """Route a user question to the most relevant datacategory."""

    datacategory: Literal["vertrag_rechnung_ihre_daten_kundencenter_login-daten_rechnung_lieferstatus", 
                          "hilfe_stoerungen_stoerungen_selbst_beheben_melden_status_verfolgen",
                          "mobilfunk_tarife_optionen_mobiles-internet_mailbox_esim_sim-karten",
                          "internet_telefonie:_ausbau,_sicherheit,_einstellungen,_bauherren,_glasfaser_und_wlan",
                          "tv_magentatv_streaming-dienste_magentatv_jugendschutz_pins",
                          "magentains_kombi-pakete_mit_magentains_vorteil_und_treuebonus",
                          "apps_dienste_e-mail_magenta_apps_voicemail_app_mobilityconnect",
                          "geraete_zubehoer_anleitungen_fuer_smartphones_tablets_telefone_router_receiver"] = Field(
        ...,
        description="Given a user question choose which datacategory would be most relevant for answering their question",
    )

# LLM with function call 
structured_model = model.with_structured_output(RouteQuery)

# Prompt 
system = """You are an expert at routing user questions to the appropriate data category.

Based on the help category the question is referring to, route it to the relevant data category. 
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Define router 
router = prompt | structured_model

In [95]:
def choose_route(result):
    # Kategorileri ve ilgili alt dizinleri bir sözlükte tanımlayın
    category_map = {
        "vertrag_rechnung_ihre_daten_kundencenter_login-daten_rechnung_lieferstatus": "Vertrag & Rechnung",
        "hilfe_stoerungen_stoerungen_selbst_beheben_melden_status_verfolgen": "Hilfe bei Störungen",
        "mobilfunk_tarife_optionen_mobiles-internet_mailbox_esim_sim-karten": "Mobilfunk",
        "internet_telefonie:_ausbau,_sicherheit,_einstellungen,_bauherren,_glasfaser_und_wlan": "Internet & Telefonie",
        "tv_magentatv_streaming-dienste_magentatv_jugendschutz_pins": "TV",
        "magentains_kombi-pakete_mit_magentains_vorteil_und_treuebonus": "MagentaEINS",
        "apps_dienste_e-mail_magenta_apps_voicemail_app_mobilityconnect": "Apps & Dienste",
        "geraete_zubehoer_anleitungen_fuer_smartphones_tablets_telefone_router_receiver": "Geräte & Zubehör"
    }
    
    # Datacategory'yi küçült ve sözlükte ara, yoksa "Others" döner
    return category_map.get(result.datacategory.lower(), "Others")

full_chain = router | RunnableLambda(choose_route)

In [96]:
data_directory = "/Users/taha/Desktop/rag/data"

sub_directory = full_chain.invoke({"question": question})
print(sub_directory)

specific_directory = os.path.join(data_directory, sub_directory)
print(specific_directory)

TV
/Users/taha/Desktop/rag/data/TV


## Retrieval

### Pre-retrieval

#### Retriever with filter

#### Problem: Birkac farkli kategoriden sorgu yapildiginda o kategorileri hafizada tutuyor ve her seferinden üstüne ekliyor retriever ile gelenlerin.
##### ama sadece belli bir kategori filtresi ile calistirmak yeterli context saglamiyor, baska yerlerde de ilgili soru icin gerekli belge olabiliyor.

#### DenseX denemesi yapiyorum

In [99]:
# İlk olarak, eski vectorstore ve retriever nesnelerini temizleyelim
vectorstore = None
retriever = None

# Çöp toplama işlemi
gc.collect()

def load_summaries(data_directory):
    summary_files = glob.glob(os.path.join(data_directory, '**/_summary.txt'), recursive=True)
    summaries = {}
    
    for file in summary_files:
        with open(file, 'r') as f:
            content = f.read()
            
        chunks = content.split("=== Chunk ===")
        
        for chunk in chunks:
            if "File path:" in chunk and "File summary:" in chunk:
                try:
                    lines = chunk.split('\n')
                    file_path_line = [line for line in lines if "File path:" in line]
                    summary_line = [line for line in lines if "File summary:" in line]

                    if file_path_line and summary_line:
                        file_path = file_path_line[0].split("File path:")[1].strip()
                        summary_text = summary_line[0].split("File summary:")[1].strip()
                        summaries[file_path] = summary_text
                except IndexError:
                    print(f"Warning: Skipping chunk due to formatting issues in file: {file}")

    return summaries

# Load summaries
summaries = load_summaries(specific_directory)

def find_closest_summaries(question, summaries, embedding, top_n=5):
    """
    Finds the closest summary files based on the user's question using cosine similarity.
    
    Parameters:
    - question (str): The user's question.
    - summaries (dict): Dictionary where keys are file paths and values are summary texts.
    - embedding (object): Embedding model used to get vector representations.
    - top_n (int): The number of top closest summaries to retrieve.
    
    Returns:
    - list: List of file paths to the top_n closest summaries.
    """
    # Embed the question
    query_embedding = embedding.embed_query(question)
    
    # List to hold (file_path, similarity) tuples
    similarities = []
    
    # Calculate similarity for each summary
    for file_path, summary in summaries.items():
        summary_embedding = embedding.embed_query(summary)
        
        # Ensure embeddings are 1D vectors for cosine similarity computation
        query_embedding = np.squeeze(query_embedding)
        summary_embedding = np.squeeze(summary_embedding)
        
        # Compute similarity
        similarity = cosine_similarity(query_embedding, summary_embedding)
        similarities.append((file_path, similarity))
    
    # Sort by similarity and get the top_n results
    similarities.sort(key=lambda x: x[1], reverse=True)
    return [file_path for file_path, _ in similarities[:top_n]]

# Retrieve closest summary files
closest_files = find_closest_summaries(question, summaries, embedding)

# Load documents from the closest file paths
def load_documents_from_paths(paths):
    docs = []
    for path in paths:
        with open(path, 'r') as f:
            content = f.read()
        docs.append(Document(page_content=content, metadata={'source': path}))
    return docs

# Load documents
docs = load_documents_from_paths(closest_files)

print(docs)

# Create vectorstore and retriever
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)
retriever = vectorstore.as_retriever()

[Document(metadata={'source': 'data/TV/https_www_telekom_de_magenta_tv_online_tv.txt'}, page_content='Source URL: https://www.telekom.de/magenta-tv/online-tv\n\nQuestion: Sie haben noch kein MagentaTV?\nAnswer: Ein großartiges Fernsehvergnügen zu Hause oder unterwegs – das genießen Sie mit der Telekom und Online-TV. Nutzen Sie dafür entweder unserenTV-Receiver, dieMagentaTV Oneoder dieMagentaTV App. Profitieren Sie von zahlreichen komfortablen Funktionen, die Ihr Online-TV-Erlebnis besonders machen:\n\n'), Document(metadata={'source': 'data/TV/https_www_telekom_de_magenta_tv.txt'}, page_content='Source URL: https://www.telekom.de/magenta-tv\n\nQuestion: Was ist der Unterschied zwischen MagentaTV Smart und MagentaTV Basic?\nAnswer: Beide Tarife bieten Ihnen ein großes Angebot an Serien und Filmen sowie Zugangsmöglichkeiten zu weiteren Streaming-Diensten. BeiMagentaTV Smartsteht Ihnen im Gegensatz zu MagentaTV Basic eine besonders große Auswahl an öffentlich-rechtlichen sowie privaten Se

#### orjinal filtreli retriever -  yedek

In [71]:
'''# İlk olarak, eski vectorstore ve retriever nesnelerini temizleyelim
vectorstore = None
retriever = None

# Çöp toplama işlemi
gc.collect()

# Load documents from the specified directory
loader = DirectoryLoader(data_directory, glob="**/*.txt", loader_cls=TextLoader)
docs = loader.load()  # Load all text documents matching the pattern
print(f"Loaded {len(docs)} documents.")  # Debug print

# Ensure all documents have metadata
for doc in docs:
    if 'full_path' not in doc.metadata:
        doc.metadata['full_path'] = doc.metadata.get('source', 'unknown')

# Manually filter documents based on metadata
filtered_docs = [doc for doc in docs if doc.metadata.get('full_path', '').startswith(specific_directory)]
print(f"Filtered {len(filtered_docs)} documents.")  # Debug print

# Create a Chroma vector store from the filtered documents and embeddings
if filtered_docs:
    vectorstore = Chroma.from_documents(documents=filtered_docs, embedding=embedding)
    print("Vectorstore created from filtered documents.")

    # Set up the retriever using the filtered vector store
    retriever = vectorstore.as_retriever()
else:
    print("No documents found to create a vectorstore.")
    retriever = None'''

Loaded 1443 documents.
Filtered 489 documents.
Vectorstore created from filtered documents.
bound=VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x1480811f0>)


#### Retriever without filter
##### Orjinal - yedek retriever, filtreleme yok. Bütün kategorilerden context ceker.

In [29]:
'''def initialize_vectorstore(directory):
    """
    Initializes a vector store from the documents found in the specified directory.
    This function performs the following steps:
    1. Loads text documents from the given directory using a DirectoryLoader.
    2. Creates embeddings for the loaded documents using a predefined embedding model.
    3. Initializes a Chroma vector store with these embeddings.
    
    Parameters:
        directory (str): The path to the directory containing text files to be processed.
    
        
    Returns:
        vectorstore (Chroma): A Chroma vector store object containing the embeddings of the documents.
        docs (List[Document]): A list of Document objects loaded from the specified directory.
        
    """
    
    # Load documents from the specified directory using DirectoryLoader
    loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)
    docs = loader.load()  # Load all text documents matching the pattern
    
    # Create a Chroma vector store from the loaded documents and embeddings
    vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)
    
    return vectorstore, docs

# Initialize the vector store and document list
vectorstore, docs = initialize_vectorstore(data_directory)

# Set up the retriever using the vector store
retriever = vectorstore.as_retriever()'''

'def initialize_vectorstore(directory):\n    """\n    Initializes a vector store from the documents found in the specified directory.\n    This function performs the following steps:\n    1. Loads text documents from the given directory using a DirectoryLoader.\n    2. Creates embeddings for the loaded documents using a predefined embedding model.\n    3. Initializes a Chroma vector store with these embeddings.\n    \n    Parameters:\n        directory (str): The path to the directory containing text files to be processed.\n    \n        \n    Returns:\n        vectorstore (Chroma): A Chroma vector store object containing the embeddings of the documents.\n        docs (List[Document]): A list of Document objects loaded from the specified directory.\n        \n    """\n    \n    # Load documents from the specified directory using DirectoryLoader\n    loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)\n    docs = loader.load()  # Load all text documents matching 

##### Main telekom prompt

##### To-do: Kategorilerde uygun cevap bulunamazsa nasil bir tepki verecek bunu tanimla.
##### Bu prompt daha efektif hale getirilebilir.

In [100]:
# Define the template for generating an answer based on context and a question
telekom_template = """You are an assistant for question-answering tasks for telekom.de help, providing answers to Telekom customers or potential customers. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer or if the provided documents do not contain relevant information, simply say that unfortunately, you cannot assist with this question and please visit www.telekom.de/hilfe for further assistance. 
Use up to four sentences and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""

prompt_telekom = ChatPromptTemplate.from_template(telekom_template)

#### Query Translation

##### Multi-query

In [101]:
# Template for Generating Alternative Questions
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""

# Create a prompt template for generating multiple perspectives of the user's question
prompt_perspectives = ChatPromptTemplate.from_template(template)

# Define a pipeline for generating alternative queries
generate_queries = (
    prompt_perspectives 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))  # Split the generated output into individual queries
)

# Asynchronous function to print generated queries
async def print_generated_queries(question):
    """
    Generates and prints multiple search queries related to the input question.
    
    Parameters:
    - question (str): The input query for which related search queries are generated.
    """
    queries = generate_queries.invoke({"question": question})
    print("\nGenerated Questions:")
    for q in queries:
        print(f"{q}")

def get_unique_union(documents):
    """
    Returns a unique union of retrieved documents.

    This function takes a list of lists of documents, flattens it, and removes duplicates
    to ensure each document is unique.

    Args:
        documents (list of lists): A list where each element is a list of documents.

    Returns:
        list: A list of unique documents.
    """
    # Flatten the list of lists of documents
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Remove duplicates by converting to a set and then back to a list
    unique_docs = list(set(flattened_docs))
    # Deserialize the documents back into their original form
    return [loads(doc) for doc in unique_docs]

# Define the retrieval chain, which includes generating queries, retrieving documents, and removing duplicates
retrieval_chain = generate_queries | retriever.map() | get_unique_union

# Retrieve multiple documents based on the input question
multi_query_docs = retrieval_chain.invoke({"question": question})


def format_docs(docs, query_embedding):
    """
    Formats the retrieved documents with their source and cosine similarity score.

    This function takes a list of documents and formats them to include the source of each document
    and its cosine similarity to the query embedding.

    Args:
        docs (list): A list of documents retrieved from the database.
        query_embedding (numpy array): The embedding of the user's query.

    Returns:
        str: A formatted string containing the source, similarity score, and content of each document.
    """
    # Initialize a set to track unique sources
    unique_sources = set()
    formatted_docs = []

    for doc in docs:
        # Retrieve the source of the document from its metadata
        source = doc.metadata.get("source")
        # Check if the source is unique
        if source and source not in unique_sources:
            unique_sources.add(source)
            # Compute the embedding of the document's content
            document_embedding = embedding.embed_query(doc.page_content)
            # Calculate cosine similarity between the query and document embeddings
            similarity = cosine_similarity(query_embedding, document_embedding)
            # Use a placeholder message if the document content is empty
            content = doc.page_content.strip() or "This document content is empty."
            # Format the document's source, similarity score, and content
            formatted_docs.append(
                f"Source document: {source}\n\nCosine Similarity: {similarity:.4f}\n\n{content}"
            )

    # Join the formatted documents into a single string
    return "\n\n".join(formatted_docs)

# Define a retrieval and generation (RAG) chain for processing the question and context
rag_chain = (
    {"context": retrieval_chain, "question": itemgetter("question")} 
    | prompt_telekom
    | model
    | StrOutputParser()
)

async def retrieve_and_format_docs(question):
    """
    Asynchronously retrieves and formats documents for the given question.

    This function retrieves documents relevant to the user's question and formats them with their
    source information and cosine similarity scores.

    Args:
        question (str): The user's question.

    Returns:
        tuple: A tuple containing the answer and formatted documents.
    """
    # Compute the embedding for the user's question
    query_embedding = embedding.embed_query(question)
    # Format the retrieved documents with their cosine similarity scores
    formatted_docs = format_docs(multi_query_docs, query_embedding)
    
    try:
        # Attempt to retrieve an answer using the RAG chain asynchronously
        answer = await rag_chain.invoke({"context": formatted_docs, "question": question})
    except TypeError:
        # Fallback in case of TypeError, invoke the RAG chain synchronously
        answer = rag_chain.invoke({"context": formatted_docs, "question": question})
    
    # Return the answer and the formatted documents
    return answer, formatted_docs

async def main():
    """
    The main asynchronous function to run the complete flow.

    This function handles the process of generating alternative queries, retrieving and formatting
    documents, and printing the final answer along with the source documents.
    """
   
    # Retrieve and format documents, then get the answer
    answer, source_docs = await retrieve_and_format_docs(question)

    get_token_count(multi_query_docs, question, prompt_telekom)
    # Print the final answer
    print("\nAnswer:", answer)
     # Generate and print alternative queries
    await print_generated_queries(question)
    # Print the source documents used for the answer
    print("\nSources:")
    print(source_docs)

# Execute the main function
await main()


Answer: Evde kullanmak üzere bir TV almayı düşünüyorsanız, Telekom'un MagentaTV hizmetini tavsiye ederim. MagentaTV ile, büyük ekran TV'nizde üstün kalitede televizyon deneyimi yaşayabilirsiniz. Bunun için MagentaTV One cihazını veya Telekom'un TV-Receiver'ını kullanabilirsiniz. Daha fazla bilgi için lütfen [www.telekom.de/hilfe](www.telekom.de/hilfe) adresini ziyaret edin.

Generated Questions:
1. Hangi TV markaları telkom ürünleriyle uyumlu çalışıyor ve önerir misiniz?
2. Telkom ürünleriyle uyumlu olan TV modelleri hakkında bilgi verebilir misiniz?
3. TV alırken telkom ürünleriyle entegrasyonu en iyi olan marka veya model hangisidir?
4. Telkom ürünleriyle uyumlu çalışan TV'ler arasında en iyi seçenek hangisidir?
5. Telkom ürünleriyle sorunsuz bir şekilde entegre olan TV modelleri hakkında önerileriniz nelerdir?

Sources:
Source document: /Users/taha/Desktop/rag/data/Geräte & Zubehör/youtube_Kennt ihr schon unsere Videoberatung？ Hier ist MagentaService Live.txt

Cosine Similarity: 0.

##### RAG-Fusion

In [32]:
# Define the template for generating multiple search queries based on a single input query.
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

# Create a chain for generating four related search queries
generate_fusion_queries = (
    prompt_rag_fusion 
    | ChatOpenAI(temperature=0)
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

async def print_generated_fusion_queries(question):
    """
    Generates and prints multiple search queries related to the input question.
    
    Parameters:
    - question (str): The input query for which related search queries are generated.
    """
    queries = generate_fusion_queries.invoke({"question": question})
    print("\nGenerated Questions:")
    for q in queries:
        print(f"{q}")

# Function for Reciprocal Rank Fusion (RRF)
def reciprocal_rank_fusion(results: list[list], k=60):
    """
    Applies Reciprocal Rank Fusion (RRF) to combine multiple lists of ranked documents.
    
    Parameters:
    - results (list[list]): A list of lists where each inner list contains ranked documents.
    - k (int): An optional parameter for the RRF formula, default is 60.
    
    Returns:
    - list: A list of tuples where each tuple contains a document and its fused score.
    """
    
    # Initialize a dictionary to store the fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Serialize the document to a string format to use as a key
            doc_str = dumps(doc)
            # Initialize the document's score if not already present
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Update the document's score using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort documents based on their fused scores in descending order
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples
    return reranked_results

# Create a retrieval chain that generates queries, retrieves documents, and applies RRF
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
fusion_docs = retrieval_chain_rag_fusion.invoke({"question": question})

# Function to get embeddings for a document's content
async def get_document_embeddings(doc):
    """
    Retrieves the embeddings for a document's content asynchronously.
    
    Parameters:
    - doc (Document): The document object whose content embeddings are to be retrieved.
    
    Returns:
    - np.ndarray: The embeddings of the document's content.
    """
    return embedding.embed_query(doc.page_content)

# Function to format fusion_docs as a readable string with similarity scores
async def format_fusion_docs_with_similarity(fusion_docs):
    """
    Formats the fusion documents with their scores and cosine similarity to the question.
    
    Parameters:
    - fusion_docs (list[tuple]): A list of tuples containing documents and their scores.
    
    Returns:
    - str: A formatted string containing each document's source, fusion score, cosine similarity, and content.
    """
    formatted_docs = []
    question_embedding = embedding.embed_query(question)
    
    for doc, score in fusion_docs:
        doc_embedding = await get_document_embeddings(doc)
        similarity = cosine_similarity(question_embedding, doc_embedding)
        source = doc.metadata.get("source", "No source")
        content = doc.page_content
        formatted_docs.append(f"Source: {source}\nFusion Score: {score:.4f}\nCosine Similarity: {similarity:.4f}\nContent: {content}\n")
    
    return "\n".join(formatted_docs)


# Create a chain that uses context and question to generate an answer
rag_chain = (
    {"context": retrieval_chain_rag_fusion, "question": itemgetter("question")} 
    | prompt_telekom
    | model
    | StrOutputParser()
)

# Asynchronous function to retrieve and format documents, then get an answer
async def retrieve_and_format_docs(question):
    """
    Retrieves and formats documents, then obtains an answer to the question.
    
    Parameters:
    - question (str): The query for which answers and document formats are required.
    
    Returns:
    - tuple: A tuple containing the answer and the formatted documents.
    """
    formatted_docs = await format_fusion_docs_with_similarity(fusion_docs)
    
    try:
        # Attempt to get the answer asynchronously
        answer = await rag_chain.invoke({"context": formatted_docs, "question": question})
    except TypeError:
        # Fallback to synchronous invocation if asynchronous fails
        answer = rag_chain.invoke({"context": formatted_docs, "question": question})
    
    return answer, formatted_docs


# Main function to run the sequence of operations
async def main():
    """
    Main function to execute the entire process: generating queries, retrieving and formatting documents, and getting answers.
    """
    doc_list = [doc for doc, score in fusion_docs]
    get_token_count(doc_list, question, prompt_telekom)
    
    answer, formatted_docs = await retrieve_and_format_docs(question)
    print("\nAnswer:", answer)
    await print_generated_fusion_queries(question)
    print("\nSources:")
    print(formatted_docs)  # Print the formatted version of fusion_docs with similarity scores

# Execute the main function
await main()


Answer: Evinizde ses komutları ile yönlendirebileceğiniz ürünler arıyorsanız, Telekom'un Magenta SmartHome ürünlerini tavsiye ederim. Magenta SmartHome, Alexa Skill ile entegre olarak çalışabilir ve böylece ışıkları açıp kapatmak, sıcaklığı ayarlamak veya radyoyu açmak gibi birçok işlemi sesli komutlarla gerçekleştirebilirsiniz. Daha fazla bilgi ve detaylı kurulum için www.telekom.de/smarte-produkte/smart-home adresini ziyaret edebilirsiniz.

Generated Questions:
1. En iyi ses komutlu ev aletleri
2. Evde kullanabileceğim akıllı cihazlar
3. Sesli komutlarla kontrol edilebilen ev ürünleri
4. Evde sesle yönlendirilebilen cihazlar önerileri

Sources:
Source: /Users/taha/Desktop/rag/data/Geräte & Zubehör/youtube_Digitale Organisation mit iOS-Geräten – Adressbuch, Kalender und Co. I Telekom Senioren-Akademie.txt
Fusion Score: 0.0640
Cosine Similarity: 0.7491
Content: Question:
Gibt es eine Möglichkeit, mit Siri eine Erinnerung für bestimmte Ereignisse oder Termine zu erstellen?

Answer:
Wil

##### Step Back

###### cosine similarity ve token sayisi eksik sadece calisiyor suan.

In [None]:
# Few Shot Examples
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]

# Transform examples into example messages
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        few_shot_prompt,
        ("user", "{question}"),
    ]
)

# Generate step-back queries
generate_queries_step_back = prompt | model | StrOutputParser()
step_back_question = generate_queries_step_back.invoke({"question": question})

print(f"Original Question: {question}")
print(f"Step-Back Question: {step_back_question}")

# Response prompt template
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# Normal Context:
{normal_context}

# Step-Back Context:
{step_back_context}

# Original Question: {question}

# Answer:
"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

def get_retrieved_content(retrieved_documents):
    """Format retrieved documents as a string with source information."""
    seen_sources = set()
    content_list = []
    for doc in retrieved_documents:
        source = doc.metadata.get('source', 'Unknown')
        if source not in seen_sources:
            seen_sources.add(source)
            content = (
                f"Source: {source}\n"
                f"Content:\n{doc.page_content}\n"
                "------------------------------\n"
            )
            content_list.append(content)
    return "\n".join(content_list)

def format_retrieved_context(query):
    """Retrieve and format context for the given query."""
    # Retrieve documents using the 'invoke' method
    retrieved_docs = retriever.invoke(query)
    return get_retrieved_content(retrieved_docs)

# Construct the chain to retrieve and generate the response
chain = (
    {
        "normal_context": lambda x: format_retrieved_context(x["question"]),
        "step_back_context": lambda x: format_retrieved_context(x["step_back_question"]),
        "question": lambda x: x["question"],
    }
    | response_prompt
    | model
    | StrOutputParser()
)

# Execute the chain
result = chain.invoke({"question": question, "step_back_question": step_back_question})

# Display the final response
print("\nAnswer:\n", result)
print("\nNormal Context:\n", format_retrieved_context(question))
print("\nStep-Back Context:\n", format_retrieved_context(step_back_question))


In [None]:
# Few Shot Examples
# This list provides example pairs of input questions and their corresponding step-back questions for model training.
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]

# Create a prompt template for examples.
# This template formats example messages for the model to learn from.
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),  # Input from the user
        ("ai", "{output}"),    # Model's response to the input
    ]
)

# Create a few-shot prompt template that includes example prompts.
# This helps the model understand the context by providing example inputs and outputs.
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

# Define the final prompt template.
# This includes system instructions and integrates the few-shot prompt.
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        few_shot_prompt,
        ("user", "{question}"),  # Input question from the user
    ]
)

# Generate step-back queries using the defined prompt.
# This involves processing the original question to generate a more general query.
generate_queries_step_back = prompt | model | StrOutputParser()
step_back_question = generate_queries_step_back.invoke({"question": question})


# Response prompt template
# This template is used to generate the final response based on the retrieved context and the original question.
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# Normal Context:
{normal_context}

# Step-Back Context:
{step_back_context}

# Original Question: {question}

# Answer:
"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

def get_retrieved_content(retrieved_documents):
    """
    Format retrieved documents as a string with source information.
    
    Args:
        retrieved_documents (list): List of documents retrieved based on the query.
        
    Returns:
        str: Formatted string containing source and content of retrieved documents.
    """
    seen_sources = set()  # Track unique sources
    content_list = []      # List to accumulate formatted content
    for doc in retrieved_documents:
        source = doc.metadata.get('source', 'Unknown')  # Get source of the document
        if source not in seen_sources:
            seen_sources.add(source)
            content = (
                f"Source: {source}\n"
                f"Content:\n{doc.page_content}\n"
                "------------------------------\n"
            )
            content_list.append(content)
    return "\n".join(content_list)

def format_retrieved_context(query):
    """
    Retrieve and format context for the given query.
    
    Args:
        query (str): The query for which context needs to be retrieved.
        
    Returns:
        str: Formatted string containing context relevant to the query.
    """
    # Retrieve documents using the 'invoke' method
    retrieved_docs = retriever.invoke(query)
    return get_retrieved_content(retrieved_docs)

# Construct the chain to retrieve and generate the response.
# This chain combines context retrieval and response generation.
chain = (
    {
        "normal_context": lambda x: format_retrieved_context(x["question"]),
        "step_back_context": lambda x: format_retrieved_context(x["step_back_question"]),
        "question": lambda x: x["question"],
    }
    | response_prompt
    | model
    | StrOutputParser()
)

# Execute the chain to get the final response.
result = chain.invoke({"question": question, "step_back_question": step_back_question})

# Display the final response along with normal and step-back contexts.
print("Answer:", result)
print(f"\n\nOriginal Question: {question}")
print(f"\nStep-Back Question: {step_back_question}")
print("\nNormal Context:\n", format_retrieved_context(question))
print("\nStep-Back Context:\n", format_retrieved_context(step_back_question))


##### HyDE

In [None]:
# HyDE document generation
template = """You are creating professional and customer-focused web page content and texts for a major telecommunications provider like Telekom.de. 
Your content is very brief, very clear, and informative. Please write a text for the following question
Question: {question}
text:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

generate_docs_for_retrieval = (
    prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser() 
)

# Run HyDE generation
try:
    hyde_output = generate_docs_for_retrieval.invoke({"question": question})
    print(f"HyDE hypothetical answer:\n{hyde_output.strip()}\n")
except Exception as e:
    logger.error(f"Error generating documents for retrieval: {e}")
    raise

# Retrieve documents
try:
    retrieval_chain = generate_docs_for_retrieval | retriever 
    retrieved_docs = retrieval_chain.invoke({"question": question})
    
    # Print retrieved documents, deduplicated
    seen_sources = set()
    print("Retrieved sources:")
    for doc in retrieved_docs:
        source = doc.metadata.get('source', 'Unknown Source')
        if source not in seen_sources:
            seen_sources.add(source)
            print(f"\nDocument Source: {source}")
            print(f"Document Content:\n{doc.page_content.strip()}")
except Exception as e:
    logger.error(f"Error retrieving documents: {e}")
    raise

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | model
    | StrOutputParser()
)

try:
    final_answer = final_rag_chain.invoke({"context": retrieved_docs, "question": question})
    print(f"\nFinal RAG Answer:\n{final_answer.strip()}")
except Exception as e:
    logger.error(f"Error generating final RAG answer: {e}")
    raise

In [None]:
# HyDE Document Generation
# This section is responsible for creating professional and customer-focused content
# for a major telecommunications provider based on a given question.

# Define a template for generating content.
# The template specifies that the content should be brief, clear, and informative.
template = """You are creating professional and customer-focused web page content and texts for a major telecommunications provider like Telekom.de. 
Your content is very brief, very clear, and informative. Please write a text for the following question:
Question: {question}
text:"""

# Create a prompt template using the defined template.
# This template will be used to generate content for a given question.
prompt_hyde = ChatPromptTemplate.from_template(template)

# Define a chain to generate documents for retrieval.
# This chain uses the prompt template, a language model, and an output parser.
generate_docs_for_retrieval = (
    prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser()
)

# Run HyDE document generation to produce content for the given question.
# The try-except block handles potential errors during document generation.
try:
    hyde_output = generate_docs_for_retrieval.invoke({"question": question})
    print(f"HyDE hypothetical context:\n{hyde_output.strip()}\n")
except Exception as e:
    logger.error(f"Error generating documents for retrieval: {e}")
    raise

# Retrieve Documents
# This section retrieves documents based on the generated content and prints them.

# Define a chain to retrieve documents using the generated content.
# The chain combines the document generation process with a retriever.
try:
    retrieval_chain = generate_docs_for_retrieval | retriever 
    retrieved_docs = retrieval_chain.invoke({"question": question})
    
    # Print retrieved documents and deduplicate them based on source information.
    seen_sources = set()
    print("Retrieved sources:")
    for doc in retrieved_docs:
        source = doc.metadata.get('source', 'Unknown Source')  # Get the source of the document
        if source not in seen_sources:
            seen_sources.add(source)
            print(f"\nSource file: {source}")
            print(f"Document Content:\n{doc.page_content.strip()}")
except Exception as e:
    logger.error(f"Error retrieving documents: {e}")
    raise

# Define a chain to generate the final answer using the RAG process.
# The chain combines the prompt template, a language model, and an output parser.
final_rag_chain = (
    prompt_telekom
    | model
    | StrOutputParser()
)

# Generate the final answer using the RAG process.
# The try-except block handles potential errors during the final answer generation.
try:
    final_answer = final_rag_chain.invoke({"context": retrieved_docs, "question": question})
    print(f"\nFinal Answer:\n{final_answer.strip()}")
except Exception as e:
    logger.error(f"Error generating final RAG answer: {e}")
    raise

##### !!Decomposition
###### Calismadi olmadi maalesef, asnwer sadece 3. sorunun cevabini veriyor, stratch den baska kaynakalara bakip cözüm bulmak lazim.

In [None]:
# Define prompts and chains
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answered in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

# Chain
generate_queries_decomposition = ( prompt_decomposition | model | StrOutputParser() | (lambda x: x.split("\n")))

# Run
questions = generate_queries_decomposition.invoke({"question":question})

In [None]:
questions

In [None]:
# Answer recursion
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""
decomposition_prompt = ChatPromptTemplate.from_template(template)

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()


q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | model
    | StrOutputParser())

    answer_decomposition = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer_decomposition)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

In [None]:
answer_decomposition

### Post-retrieval