# Advanced RAG

#### Problem: her sorguda farkli kategori retrieve edilince bellekte üstüne ekliyor, dolasiyla farkli kategorilerden data alabiliyor soru degistikce, vector database her seferinde hafizayi silmeli!
###### Bütüm kodta tutarlilik kontrolü yap, mesela hersey cevabi "answer" olarak dönmeli, fronend icin.
###### Prompt'larin hepsini yukarida bir hücrede topla

<img src="Advanced.png" alt="Grafik" width="1000" height="800"/>

In [32]:
#question = "Tesla'nin en yeni modeli hangisidir?"
question = "Evimda kullandigim internetin hizindan problem yasiyorum, kesintiler oluyor, ne yapmaliyim?"

### Initials

##### Data directory
###### data klasörünün altinda 8 ana kategörü olmali, tam olarak isimler uyusmali

In [33]:
# Define the directory containing the rag data
data_directory = "/Users/taha/Desktop/rag/data"

##### Imports

In [34]:
# Import necessary libraries
import os
import numpy as np
import glob
import gc
import tiktoken
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.summarize import load_summarize_chain
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, FewShotChatMessagePromptTemplate, PromptTemplate
from langchain.load import dumps, loads
from langchain.schema import Document
from langgraph.graph import END, StateGraph, START
from operator import itemgetter
from tavily import TavilyClient
from typing import Literal, List, Tuple
from typing_extensions import TypedDict
from pprint import pprint

##### Model and template
###### To-do: Kategorilerde uygun cevap bulunamazsa nasil bir tepki verecek bunu tanimla.
###### Bu prompt daha efektif hale getirilebilir.

In [35]:
# Load API Keys from environment variables
load_dotenv()  # Load environment variables from a .env file

# Retrieve API keys from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

# Initialize the chat model and embedding model
# ChatOpenAI is used to interact with the OpenAI GPT model, and OpenAIEmbeddings is used for generating embeddings for documents
model = ChatOpenAI(model="gpt-4o", api_key=OPENAI_API_KEY)
embedding = OpenAIEmbeddings(api_key=OPENAI_API_KEY)

In [36]:
# Model max token limit
MAX_TOKEN_LENGTH = 3000

##### Prompts

In [37]:
## Main prompt: telekom assistant
telekom_template = """You are an assistant for question-answering tasks for telekom.de help, providing answers to Telekom customers or potential customers. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer or if the provided documents do not contain relevant information, simply say that unfortunately, you cannot assist with this question and please visit www.telekom.de/hilfe for further assistance. 
Use up to four sentences and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
prompt_telekom = ChatPromptTemplate.from_template(telekom_template)

## CRAG and Self-RAG: retrieval grader
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

## CRAG and Self-RAG: re_write prompt
re_write_system = """You are a question re-writer that converts an input question into a better version optimized for web search and context search.\n 
     Always provide the question in German. Look at the input  and try to reason about the underlying semantic intent or meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", re_write_system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

## CRAG and Self-RAG: hallucination grader prompt
system_hallucination = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_hallucination),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

## CRAG and Self-RAG: answer grader prompt
grader_system = """You are a grader assessing whether an answer corresponds to a question or whether it is an appropriate response to that question.\n
    Give a binary score of ‘yes’ or ‘no’. ‘yes’ means that the answer corresponds to the question."""

answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", grader_system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

## Multi-Query: Template for Generating Alternative Questions
multi_query_template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
# Create a prompt template for generating multiple queries of the user's question
multi_query_prompt = ChatPromptTemplate.from_template(multi_query_template)


## RAG-Fusion: template for generating multiple search queries based on a single input query.
fusion_template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
# Create a prompt template for generating multiple queries of the user's question
prompt_rag_fusion = ChatPromptTemplate.from_template(fusion_template)

## Step Back
# Few Shot Examples
few_shot_examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]

# Transform examples into example messages
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=few_shot_examples,
)

step_back_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        few_shot_prompt,
        ("user", "{question}"),
    ]
)

## HyDE: Document Generation
# This section is responsible for creating professional and customer-focused content
# for a major telecommunications provider based on a given question.

# Define a template for generating content.
# The template specifies that the content should be brief, clear, and informative.
hyde_content_template = """You are creating professional and customer-focused web page content and texts for a major telecommunications provider like Telekom.de. 
Your content is very brief, very clear, and informative. Please write a text for the following question:
Question: {question}
text:"""

# Create a prompt template using the defined template.
# This template will be used to generate content for a given question.
prompt_hyde = ChatPromptTemplate.from_template(hyde_content_template)

## Decomposition: Sub-questions prompt
decomposition_template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answered in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(decomposition_template)

# Decomposition answer recursion
decomposition_template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""
decomposition_prompt = ChatPromptTemplate.from_template(decomposition_template)

# Decomposition individual answer prompt
decomposition_individual_template = """Here is a set of Q+A pairs:

{decomposition_individual_context}

Use these to synthesize an answer to the question: {question}
"""

decomposition_individual_prompt = ChatPromptTemplate.from_template(decomposition_individual_template)


##### Summarizing

In [38]:
# Load summarizing chain with "refine" method to reduce token size
summarize_chain = load_summarize_chain(model, chain_type="refine")

##### Tokenizer - Token count

In [39]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")

def get_token_count(docs, question, prompt):
    """
    If the total token count for the RAG chain exceeds the limit, summarize only the retrieved documents.

    Args:
        docs (list): List of documents to check for token limits and summarize if needed.
        question (str): The original question to include in token count.
        prompt (str): The prompt template to include in token count.
        max_token_length (int): The maximum number of tokens allowed before summarization.

    Returns:
        list: Summarized documents or original documents based on token limit.
    """
    # Calculate token counts for different components
    prompt_tokens = num_tokens_from_string(prompt.format(context="dummy", question=question), "cl100k_base")
    question_tokens = num_tokens_from_string(question, "cl100k_base")
    docs_tokens = sum([num_tokens_from_string(doc.page_content, "cl100k_base") for doc in docs])
    
    # Total token count including prompt, question, and documents
    total_tokens = prompt_tokens + question_tokens + docs_tokens
    #print(f"Token count (prompt): {prompt_tokens}")
    #print(f"Token count (question): {question_tokens}")
    #print(f"Token count (retrieved documents): {docs_tokens}")
    #print(f"Total token count (for RAG chain): {total_tokens}")
    
   
    return total_tokens

##### Cosine similarity

In [40]:
# Function to calculate cosine similarity between two vectors
def cosine_similarity(vec1, vec2):
    """
    Computes the cosine similarity between two vectors.
    
    Parameters:
    - vec1 (np.ndarray): The first vector.
    - vec2 (np.ndarray): The second vector.
    
    Returns:
    - float: The cosine similarity between vec1 and vec2.
    """
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2) if (norm_vec1 and norm_vec2) else 0.0

##### Print multi queries

In [41]:
# Asynchronous function to print generated queries
async def print_generated_queries(question, multi_query_chain):
    """
    Generates and prints multiple search queries related to the input question.
    
    Parameters:
    - question (str): The input query for which related search queries are generated.
    """
    multiple_queries = multi_query_chain.invoke({"question": question})
    print("\nGenerated Questions:")
    for q in multiple_queries:
        print(f"{q}")

## Query Translation

### Multi-query

In [42]:
# Define a pipeline for generating alternative queries
generate_multi_queries = (
    multi_query_prompt 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))  # Split the generated output into individual queries
)

def get_unique_union(documents):
    """
    Returns a unique union of retrieved documents.

    This function takes a list of lists of documents, flattens it, and removes duplicates
    to ensure each document is unique.

    Args:
        documents (list of lists): A list where each element is a list of documents.

    Returns:
        list: A list of unique documents.
    """
    # Flatten the list of lists of documents
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Remove duplicates by converting to a set and then back to a list
    unique_docs = list(set(flattened_docs))
    # Deserialize the documents back into their original form
    return [loads(doc) for doc in unique_docs]

### RAG-Fusion

In [43]:
# Create a chain for generating four related search queries
generate_fusion_queries = (
    prompt_rag_fusion 
    | ChatOpenAI(temperature=0)
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)


# Function for Reciprocal Rank Fusion (RRF)
def reciprocal_rank_fusion(results: list[list], k=60):
    """
    Applies Reciprocal Rank Fusion (RRF) to combine multiple lists of ranked documents.
    
    Parameters:
    - results (list[list]): A list of lists where each inner list contains ranked documents.
    - k (int): An optional parameter for the RRF formula, default is 60.
    
    Returns:
    - list: A list of tuples where each tuple contains a document and its fused score.
    """
    
    # Initialize a dictionary to store the fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Serialize the document to a string format to use as a key
            doc_str = dumps(doc)
            # Initialize the document's score if not already present
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Update the document's score using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort documents based on their fused scores in descending order
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples
    return reranked_results

### HyDE

In [44]:
# Define a chain to generate documents for retrieval.
# This chain uses the prompt template, a language model, and an output parser.
generate_docs_for_retrieval = (
    prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser()
)

# Run HyDE document generation to produce content for the given question.
# The try-except block handles potential errors during document generation.
try:
    hyde_output = generate_docs_for_retrieval.invoke({"question": question})
    print(f"HyDE hypothetical context:\n{hyde_output.strip()}\n")
except Exception as e:
    logger.error(f"Error generating documents for retrieval: {e}")
    raise

HyDE hypothetical context:
Evimde yaşadığım internet hızı sorunları ve kesintiler için özür dileriz. Sorununuzu çözmek için lütfen müşteri hizmetlerimizle iletişime geçin. Size en iyi hizmeti sunabilmek için elimizden geleni yapacağız. Teşekkürler.



### Step-back

In [45]:
# Generate step-back queries
generate_queries_step_back = step_back_prompt | model | StrOutputParser()
step_back_question = generate_queries_step_back.invoke({"question": question})

print(f"Original Question: {question}")
print(f"Step-Back Question: {step_back_question}")

# Response prompt template
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# Normal Context:
{normal_context}

# Step-Back Context:
{step_back_context}

# Original Question: {question}

# Answer:
"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

Original Question: Evimda kullandigim internetin hizindan problem yasiyorum, kesintiler oluyor, ne yapmaliyim?
Step-Back Question: How can I fix issues with my home internet connection?


### Decomposition

In [46]:
# Chain
generate_queries_decomposition = ( prompt_decomposition | model | StrOutputParser() | (lambda x: x.split("\n")))

# Run
decomposition_questions = generate_queries_decomposition.invoke({"question":question})



## Routing

### Logical Routing

#### 8 Kategoriden birine atiyor, datacategory icerigine göre. Belki biraz daha genisletilebilir, daha uygun kategori atamasi icin.

###### ESKI NOT DEGERLENDIR: Routing mantigi calismadi retriever'i sadece ilk seferde filtreliyor, her seferinde chroma ya gömüyü ve hepsini ariyor.
###### chain invoke etmeden retriever cagrildigi yerde filtreleme olabilir. Bunu dene!

In [47]:
# Data model
class RouteQuery(BaseModel):
    """Route a user question to the most relevant datacategory."""

    datacategory: Literal["vertrag_rechnung_ihre_daten_kundencenter_login-daten_rechnung_lieferstatus", 
                          "hilfe_stoerungen_stoerungen_selbst_beheben_melden_status_verfolgen",
                          "mobilfunk_tarife_optionen_mobiles-internet_mailbox_esim_sim-karten",
                          "internet_telefonie:_ausbau,_sicherheit,_einstellungen,_bauherren,_glasfaser_und_wlan",
                          "tv_magentatv_streaming-dienste_magentatv_jugendschutz_pins",
                          "magentains_kombi-pakete_mit_magentains_vorteil_und_treuebonus",
                          "apps_dienste_e-mail_magenta_apps_voicemail_app_mobilityconnect",
                          "geraete_zubehoer_anleitungen_fuer_smartphones_tablets_telefone_router_receiver"] = Field(
        ...,
        description="Given a user question choose which datacategory would be most relevant for answering their question",
    )

# LLM with function call 
structured_model = model.with_structured_output(RouteQuery)

# Prompt 
routing_template = """You are an expert at routing user questions to the appropriate data category.

Based on the help category the question is referring to, route it to the relevant data category. 
"""

routing_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", routing_template),
        ("human", "{question}"),
    ]
)

# Define router 
router = routing_prompt | structured_model

##### !!!!!BURADA OTHER DONMELI MI DÜZELT BUNU????
##### summary.txt'lerinde summary'lerini alip her kategori icin keyword cikarttir gpt'ye
##### daha iyi kategori filtrelemesi yapilir bu sekilde.

In [48]:
def choose_route(result):
    # Kategorileri ve ilgili alt dizinleri bir sözlükte tanımlayın
    category_map = {
        "vertrag_rechnung_ihre_daten_kundencenter_login-daten_rechnung_lieferstatus": "Vertrag & Rechnung",
        "hilfe_stoerungen_stoerungen_selbst_beheben_melden_status_verfolgen": "Hilfe bei Störungen",
        "mobilfunk_tarife_optionen_mobiles-internet_mailbox_esim_sim-karten": "Mobilfunk",
        "internet_telefonie:_ausbau,_sicherheit,_einstellungen,_bauherren,_glasfaser_und_wlan": "Internet & Telefonie",
        "tv_magentatv_streaming-dienste_magentatv_jugendschutz_pins": "TV",
        "magentains_kombi-pakete_mit_magentains_vorteil_und_treuebonus": "MagentaEINS",
        "apps_dienste_e-mail_magenta_apps_voicemail_app_mobilityconnect": "Apps & Dienste",
        "geraete_zubehoer_anleitungen_fuer_smartphones_tablets_telefone_router_receiver": "Geräte & Zubehör"
    }
    
    # Datacategory'yi küçült ve sözlükte ara, yoksa "Others" döner
    return category_map.get(result.datacategory.lower(), "Others")

full_chain = router | RunnableLambda(choose_route)

In [49]:
data_directory = "/Users/taha/Desktop/rag/data"

sub_directory = full_chain.invoke({"question": question})
print(sub_directory)

specific_directory = os.path.join(data_directory, sub_directory)
print(specific_directory)

Hilfe bei Störungen
/Users/taha/Desktop/rag/data/Hilfe bei Störungen


## Indexing

### DenseX
###### Orjnal DenseX, specific_directory'den cekio
###### Yedek 2 li retriever drive'da

In [50]:

# Sabitler
TOP_N = 20 # Ilgili specified directory'den kac tane en yakin dosyayi getirmek istedigim.
SUMMARY_FILE_PATTERN = '**/_summary.txt'

vectorstore = None
retriever = None

# Çöp toplama işlemi
gc.collect()

def load_summaries(data_directory):
    """
    Summarize the content of _summary.txt files in the given directory.
    """
    summary_files = glob.glob(os.path.join(data_directory, SUMMARY_FILE_PATTERN), recursive=True)
    summaries = {}
    
    for file in summary_files:
        with open(file, 'r') as f:
            content = f.read()
        
        chunks = content.split("=== Chunk ===")
        
        for chunk in chunks:
            if "File path:" in chunk and "File summary:" in chunk:
                try:
                    lines = chunk.split('\n')
                    file_path_line = [line for line in lines if "File path:" in line]
                    summary_line = [line for line in lines if "File summary:" in line]

                    if file_path_line and summary_line:
                        file_path = file_path_line[0].split("File path:")[1].strip()
                        summary_text = summary_line[0].split("File summary:")[1].strip()
                        summaries[file_path] = summary_text
                except IndexError:
                    print(f"Warning: Skipping chunk due to formatting issues in file: {file}")

    return summaries


def create_chroma_vectorstore(summaries, embedding):
    """
    Create a Chroma vectorstore from the provided summaries.
    """
    documents = []
    summaries_text = list(summaries.values())
    file_paths = list(summaries.keys())
    
    # Embed summaries in batch
    summary_embeddings = embedding.embed_documents(summaries_text)
    
    # Debug: Print size of embeddings and a sample embedding
    print(f"Total embeddings calculated: {len(summary_embeddings)}")

    # Create Document objects
    for i, summary in enumerate(summaries_text):
        doc = Document(page_content=summary, metadata={'source': file_paths[i]})
        documents.append(doc)
    
    # Debug: Print size of documents list
    print(f"Total documents created: {len(documents)}")
    
    # Create Chroma vectorstore from documents
    summary_vectorstore = Chroma.from_documents(documents=documents, embedding=embedding)
    return summary_vectorstore


def find_closest_summaries_with_chroma(question, summary_retriever, top_n=TOP_N):
    """
    Finds the closest summary files based on the user's question using the Chroma retriever.
    Ensures that only unique file paths are returned, with no duplicates. If there aren't
    enough unique results, it keeps searching until it finds `top_n` unique results.
    """
    unique_paths = []
    seen_files = set()
    retries = 0  # To prevent infinite loops in case something goes wrong

    while len(unique_paths) < top_n and retries < 5:  # Limit retries to 5 to avoid infinite loops
        # Get results from the retriever
        results = summary_retriever.get_relevant_documents(question)
        
        # Debug: Print how many results were found in this iteration
        print(f"Iteration {retries + 1}, results found: {len(results)}")

        for result in results:
            file_path = result.metadata['source']
            
            # Check if the file path has already been added
            if file_path not in seen_files:
                unique_paths.append(file_path)
                seen_files.add(file_path)
            
            # Stop once we have the desired number of unique paths
            if len(unique_paths) >= top_n:
                break
        
        retries += 1  # Increment retry counter in case we need to search again

    # Debug: Print how many unique results were retrieved in total
    print(f"Number of unique results retrieved: {len(unique_paths)}")

    # If after retries we still don't have enough results, warn the user
    if len(unique_paths) < top_n:
        print(f"Warning: Only {len(unique_paths)} unique results were found after {retries} iterations.")

    return unique_paths


def load_original_documents_from_summary_paths(summary_paths):
    """
    Load the original documents based on the summary file paths.
    """
    docs = []
    for summary_path in summary_paths:
        if not os.path.exists(summary_path):
            print(f"Original document not found for summary: {summary_path}")
            continue
        
        try:
            with open(summary_path, 'r') as f:
                content = f.read()
            docs.append(Document(page_content=content, metadata={'source': summary_path}))
            print(f"Successfully loaded document from: {summary_path}")  # Debug: Log successful load
        except FileNotFoundError:
            print(f"Original document not found for summary: {summary_path}")  # Debug: Log missing file
        except Exception as e:
            print(f"Error loading document from {summary_path}: {e}")  # Debug: Log any other error
    
    return docs


# Özetleri yükleyin
summaries = load_summaries(specific_directory)

# Chroma vektör mağazasını oluşturun
summary_vectorstore = create_chroma_vectorstore(summaries, embedding)

# Chroma'dan bir retriever oluşturun
summary_retriever = summary_vectorstore.as_retriever(search_kwargs={"k": TOP_N})

# En yakın özetleri bulun
closest_summary_files = find_closest_summaries_with_chroma(question, summary_retriever, top_n=TOP_N)

# Clear Chroma vectorstore after use
summary_vectorstore.delete_collection()  # This will delete all vectors in the collection
print("Summary vectorstore has been cleared.")

# En yakın özetlerin işaret ettiği orijinal dosyaları yükleyin
docs = load_original_documents_from_summary_paths(closest_summary_files)

# Orijinal belgelerden bir vektör mağazası ve retriever oluşturun
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)
retriever = vectorstore.as_retriever()


Total embeddings calculated: 83
Total documents created: 83
Iteration 1, results found: 20
Iteration 2, results found: 20
Iteration 3, results found: 20
Iteration 4, results found: 20
Iteration 5, results found: 20
Number of unique results retrieved: 14
Summary vectorstore has been cleared.
Successfully loaded document from: data/Hilfe bei Störungen/youtube_Telekom hilft News： Internet-Abbrüche vermeiden - So geht's!.txt
Successfully loaded document from: data/Hilfe bei Störungen/https_www_telekom_de_hilfe_hilfeangebote_hilfe_bei_stoerungen.txt
Successfully loaded document from: data/Hilfe bei Störungen/youtube_Störung？ Diagnose selbst durchführen - kinderleicht..txt
Successfully loaded document from: data/Hilfe bei Störungen/https_www_telekom_de_hilfe_hilfe_bei_stoerungen_verbunden_bleiben.txt
Successfully loaded document from: data/Hilfe bei Störungen/youtube_Router-Notfallhilfe： Schnelle Lösungen bei Ausfällen #shorts #telekom #hilfe #router #magenta.txt
Successfully loaded document

### Naive RAG Retriever
##### ONEMLI: Buradaki data cok büyük oldugu icin server gibi bir opsiyon olabilir datalari sabit orda tutmak icin

In [51]:
'''retriever_all = None

def initialize_vectorstore(directory):
    """
    Initializes a vector store from the documents found in the specified directory.
    This function performs the following steps:
    1. Loads text documents from the given directory using a DirectoryLoader.
    2. Creates embeddings for the loaded documents using a predefined embedding model.
    3. Initializes a Chroma vector store with these embeddings.
    
    Parameters:
        directory (str): The path to the directory containing text files to be processed.
    
        
    Returns:
        vectorstore (Chroma): A Chroma vector store object containing the embeddings of the documents.
        docs (List[Document]): A list of Document objects loaded from the specified directory.
        
    """
    
    # Load documents from the specified directory using DirectoryLoader
    loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)
    docs = loader.load()  # Load all text documents matching the pattern
    # Filter out any documents that are named '_summary.txt'
    filtered_docs = [doc for doc in docs if not os.path.basename(doc.metadata['source']).endswith('_summary.txt')]
    
    
    # Create a Chroma vector store from the loaded documents and embeddings
    vectorstore = Chroma.from_documents(documents=filtered_docs, embedding=embedding)
    
    return vectorstore

# Initialize the vector store and document list
full_vectorstore= initialize_vectorstore(data_directory)

# Set up the retriever using the vector store
retriever_all = full_vectorstore.as_retriever()'''

'retriever_all = None\n\ndef initialize_vectorstore(directory):\n    """\n    Initializes a vector store from the documents found in the specified directory.\n    This function performs the following steps:\n    1. Loads text documents from the given directory using a DirectoryLoader.\n    2. Creates embeddings for the loaded documents using a predefined embedding model.\n    3. Initializes a Chroma vector store with these embeddings.\n    \n    Parameters:\n        directory (str): The path to the directory containing text files to be processed.\n    \n        \n    Returns:\n        vectorstore (Chroma): A Chroma vector store object containing the embeddings of the documents.\n        docs (List[Document]): A list of Document objects loaded from the specified directory.\n        \n    """\n    \n    # Load documents from the specified directory using DirectoryLoader\n    loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)\n    docs = loader.load()  # Load all 

## Retrieval

### Pre-retrieval

#### Multi-query

In [52]:
# Define the retrieval chain, which includes generating queries, retrieving documents, and removing duplicates
multi_query_retrieval_chain = generate_multi_queries | retriever.map() | get_unique_union

# Retrieve multiple documents based on the input question
multi_query_docs = multi_query_retrieval_chain.invoke({"question": question})

  return [loads(doc) for doc in unique_docs]


#### RAG-Fusion

In [53]:
# Create a retrieval chain that generates queries, retrieves documents, and applies RRF
retrieval_chain_rag_fusion = generate_fusion_queries | retriever.map() | reciprocal_rank_fusion
fusion_docs = retrieval_chain_rag_fusion.invoke({"question": question})

#### HyDE

In [54]:
# Retrieve Documents
# This section retrieves documents based on the generated content and prints them.

# Define a chain to retrieve documents using the generated content.
# The chain combines the document generation process with a retriever.
try:
    retrieval_chain = generate_docs_for_retrieval | retriever 
    retrieved_docs = retrieval_chain.invoke({"question": question})
    
    # Print retrieved documents and deduplicate them based on source information.
    seen_sources = set()
    print("Retrieved sources:")
    for doc in retrieved_docs:
        source = doc.metadata.get('source', 'Unknown Source')  # Get the source of the document
        if source not in seen_sources:
            seen_sources.add(source)
            print(f"\nSource file: {source}")
            print(f"Document Content:\n{doc.page_content.strip()}")
except Exception as e:
    logger.error(f"Error retrieving documents: {e}")
    raise

Retrieved sources:

Source file: data/Hilfe bei Störungen/youtube_Telekom hilft News： Internet-Abbrüche vermeiden - So geht's!.txt
Document Content:
Question:
Was ist aus Ihrer Sicht der häufigste Grund für eine Schwankung der Internetgeschwindigkeit?

Answer:
Thomas von Telekom erklärt, dass Internetgeschwindigkeitsschwankungen oft durch defekte oder ungeeignete Kabel, die den Hausanschluss verbinden, verursacht werden können. Als hauptsächlicher Grund wird die sogenannte "Entleitung" genannt. Es gibt drei Arten von Leitungen: Ein von der Telekom verwendetes Kabel, das immer geeignet ist, ein Netzwerkkabel, das als Alternative funktioniert und ein Elektrikerkabel, das nicht mehr geeignet ist. Es wird empfohlen, das Kabel ohne Unterbrechungen und nicht zu nah an elektrischen Leitungen zu verlegen. Ein Entleitungsflyer kann auf Wunsch zur Verfügung gestellt werden, um weitere Informationen und einen Link zum Bestellen des Kabels zu erhalten.

Source file: data/Hilfe bei Störungen/https_

#### Step-back

In [55]:
def get_retrieved_content(retrieved_documents):
    """Format retrieved documents as a string with source information."""
    seen_sources = set()
    content_list = []
    for doc in retrieved_documents:
        source = doc.metadata.get('source', 'Unknown')
        if source not in seen_sources:
            seen_sources.add(source)
            content = (
                f"Source: {source}\n"
                f"Content:\n{doc.page_content}\n"
                "------------------------------\n"
            )
            content_list.append(content)
    return "\n".join(content_list)

def format_retrieved_context(query):
    """Retrieve and format context for the given query."""
    # Retrieve documents using the 'invoke' method
    retrieved_docs = retriever.invoke(query)
    return get_retrieved_content(retrieved_docs)

### Post-retrieval

## Generation

#### Multi-query

In [56]:
# Define a retrieval and generation (RAG) chain for processing the question and context
multi_query_rag_chain = (
    {"context": multi_query_retrieval_chain, "question": itemgetter("question")} 
    | prompt_telekom
    | model
    | StrOutputParser()
)

def format_docs(docs):
    """
    Formats the retrieved documents with their source and cosine similarity score.

    This function takes a list of documents and formats them to include the source of each document
    and its cosine similarity to the query embedding.

    Args:
        docs (list): A list of documents retrieved from the database.
        query_embedding (numpy array): The embedding of the user's query.

    Returns:
        str: A formatted string containing the source, similarity score, and content of each document.
    """
    # Initialize a set to track unique sources
    unique_sources = set()
    formatted_docs = []
    question_embedding = embedding.embed_query(question)

    for doc in docs:
        # Retrieve the source of the document from its metadata
        source = doc.metadata.get("source")
        # Check if the source is unique
        if source and source not in unique_sources:
            unique_sources.add(source)
            # Compute the embedding of the document's content
            document_embedding = embedding.embed_query(doc.page_content)
            # Calculate cosine similarity between the query and document embeddings
            similarity = cosine_similarity(question_embedding, document_embedding)
            # Use a placeholder message if the document content is empty
            content = doc.page_content.strip() or "This document content is empty."
            # Format the document's source, similarity score, and content
            formatted_docs.append(
                f"Source document: {source}\n\nCosine Similarity: {similarity:.4f}\n\n{content}"
            )

    # Join the formatted documents into a single string
    return "\n\n".join(formatted_docs)

async def generate_and_format_docs(question):
    """
    Asynchronously retrieves and formats documents for the given question.

    This function retrieves documents relevant to the user's question and formats them with their
    source information and cosine similarity scores.

    Args:
        question (str): The user's question.

    Returns:
        tuple: A tuple containing the answer and formatted documents.
    """
    # Compute the embedding for the user's question
    
    # Format the retrieved documents with their cosine similarity scores
    formatted_docs = format_docs(multi_query_docs)
    
    try:
        # Attempt to retrieve an answer using the RAG chain asynchronously
        answer = await multi_query_rag_chain.invoke({"context": formatted_docs, "question": question})
    except TypeError:
        # Fallback in case of TypeError, invoke the RAG chain synchronously
        answer = multi_query_rag_chain.invoke({"context": formatted_docs, "question": question})
    
    # Return the answer and the formatted documents
    return answer, formatted_docs

async def main():
    """
    The main asynchronous function to run the complete flow.

    This function handles the process of generating alternative queries, retrieving and formatting
    documents, and printing the final answer along with the source documents.
    """
   
    # Retrieve and format documents, then get the answer
    multi_query_answer, source_docs = await generate_and_format_docs(question)

    # Print the final answer
    print("\nAnswer:", multi_query_answer)
     # Generate and print alternative queries
    await print_generated_queries(question, generate_multi_queries)
    # Print the source documents used for the answer
    print("\nSources:")
    print(source_docs)

# Execute the main function
await main()


Answer: Evinizde kullandığınız internet hızında problem yaşıyorsanız ve kesintiler oluyorsa, öncelikle modem ve kablolarınızı kontrol etmelisiniz. Tüm kabloların doğru ve sağlam bir şekilde bağlı olduğundan emin olun ve modemi yeniden başlatmayı deneyin. Eğer sorun devam ediyorsa, Telekom'un "Mein Magenta" uygulaması üzerinden bir arıza bildirimi yapabilirsiniz. Daha fazla yardım için lütfen www.telekom.de/hilfe adresini ziyaret edin.

Generated Questions:
1. Evimde internet hızımda sorun yaşıyorum ve sık sık kesintiler oluyor, bu durumu nasıl çözebilirim?
2. Evimde kullandığım internet bağlantısında hız sorunu ve kesintiler yaşıyorum, bu konuda ne gibi adımlar atabilirim?
3. İnternet hızımda ve bağlantımda sorunlar yaşıyorum, kesintiler meydana geliyor, bu durumu düzeltmek için ne yapabilirim?
4. Evimdeki internet bağlantısında hız problemleri ve kesintiler yaşıyorum, bu sorunu çözmek için hangi adımları izlemeliyim?
5. Evimde kullanmakta olduğum internet bağlantısında hız düşüklüğü 

#### RAG-Fusion

In [57]:
# Create a chain that uses context and question to generate an answer
fusion_rag_chain = (
    {"context": retrieval_chain_rag_fusion, "question": itemgetter("question")} 
    | prompt_telekom
    | model
    | StrOutputParser()
)

# Function to format fusion_docs as a readable string with similarity scores
async def format_fusion_docs_with_similarity(fusion_docs):
    """
    Formats the fusion documents with their scores and cosine similarity to the question.
    
    Parameters:
    - fusion_docs (list[tuple]): A list of tuples containing documents and their scores.
    
    Returns:
    - str: A formatted string containing each document's source, fusion score, cosine similarity, and content.
    """
    formatted_docs = []
    question_embedding = embedding.embed_query(question)
    
    for doc, score in fusion_docs:
        doc_embedding = embedding.embed_query(doc.page_content)
        similarity = cosine_similarity(question_embedding, doc_embedding)
        source = doc.metadata.get("source", "No source")
        content = doc.page_content
        formatted_docs.append(f"Source: {source}\nFusion Score: {score:.4f}\nCosine Similarity: {similarity:.4f}\nContent: {content}\n")
    
    return "\n".join(formatted_docs)

# Asynchronous function to retrieve and format documents, then get an answer
async def retrieve_and_format_docs(question):
    """
    Retrieves and formats documents, then obtains an answer to the question.
    
    Parameters:
    - question (str): The query for which answers and document formats are required.
    
    Returns:
    - tuple: A tuple containing the answer and the formatted documents.
    """
    formatted_docs = await format_fusion_docs_with_similarity(fusion_docs)
    
    try:
        # Attempt to get the answer asynchronously
        answer = await fusion_rag_chain.invoke({"context": formatted_docs, "question": question})
    except TypeError:
        # Fallback to synchronous invocation if asynchronous fails
        answer = fusion_rag_chain.invoke({"context": formatted_docs, "question": question})
    
    return answer, formatted_docs


# Main function to run the sequence of operations
async def main():
    """
    Main function to execute the entire process: generating queries, retrieving and formatting documents, and getting answers.
    """
    
    fusion_answer, formatted_docs = await retrieve_and_format_docs(question)
    print("\nAnswer:", fusion_answer)
    await print_generated_queries(question, generate_fusion_queries)
    print("\nSources:")
    print(formatted_docs)  # Print the formatted version of fusion_docs with similarity scores

# Execute the main function
await main()


Answer: İnternet hızınızdan kaynaklanan problemler yaşıyorsanız, öncelikle evde kullanılan kabloları kontrol edin. Defolu veya uygun olmayan kablolar internet hızında dalgalanmalara ve kesintilere yol açabilir. Modeminizi yeniden başlatmayı veya modemi elektrikten 10 dakika boyunca çıkarıp tekrar takmayı deneyebilirsiniz. Eğer bunlar sorunu çözmezse, lütfen www.telekom.de/hilfe adresini ziyaret ederek bir arıza bildirimi yapın.

Generated Questions:
1. Ev internet hızı nasıl arttırılır?
2. İnternet kesintileri nasıl giderilir?
3. Evde internet hızını artırmak için ne yapabilirim?
4. İnternet bağlantı sorunları nasıl çözülür?

Sources:
Source: data/Hilfe bei Störungen/youtube_Telekom hilft News： Internet-Abbrüche vermeiden - So geht's!.txt
Fusion Score: 0.0656
Cosine Similarity: 0.7822
Content: Question:
Was ist aus Ihrer Sicht der häufigste Grund für eine Schwankung der Internetgeschwindigkeit?

Answer:
Thomas von Telekom erklärt, dass Internetgeschwindigkeitsschwankungen oft durch de

#### HyDE

In [58]:
# Define a chain to generate the final answer using the RAG process.
# The chain combines the prompt template, a language model, and an output parser.
final_rag_chain = (
    prompt_telekom
    | model
    | StrOutputParser()
)

# Generate the final answer using the RAG process.
# The try-except block handles potential errors during the final answer generation.
try:
    final_answer = final_rag_chain.invoke({"context": retrieved_docs, "question": question})
    print(f"\nFinal Answer:\n{final_answer.strip()}")
except Exception as e:
    logger.error(f"Error generating final RAG answer: {e}")
    raise


Final Answer:
İnternet hızında problem yaşıyorsanız veya kesintiler oluyorsa, öncelikle evdeki kablo bağlantılarını kontrol etmeniz önerilir. Defekte veya uygun olmayan kablolar internet hızında dalgalanmalara neden olabilir. Ayrıca, internet sorunlarınızı [buradan](https://www.telekom.de/hilfe/hilfe-bei-stoerungen) bildirerek yardım alabilirsiniz. Daha fazla destek için lütfen www.telekom.de/hilfe adresini ziyaret edin.


#### Step-back

In [59]:
step_back_chain = (
    {
        "normal_context": lambda x: format_retrieved_context(x["question"]),
        "step_back_context": lambda x: format_retrieved_context(x["step_back_question"]),
        "question": lambda x: x["question"],
    }
    | response_prompt
    | model
    | StrOutputParser()
)

# Execute the chain
step_back_answer = step_back_chain.invoke({"question": question, "step_back_question": step_back_question})

# Display the final response
print("\nAnswer:\n", step_back_answer)
print("\nNormal Context:\n", format_retrieved_context(question))
print("\nStep-Back Context:\n", format_retrieved_context(step_back_question))


Answer:
 Evinizde kullandığınız internetin hızında problemler yaşıyorsanız ve kesintiler oluyorsa, aşağıdaki adımları izleyerek sorunu çözmeye çalışabilirsiniz:

1. **Kabloları Kontrol Edin**:
   - İnternet hızındaki dalgalanmaların yaygın sebeplerinden biri, ev bağlantısını sağlayan kabloların hasarlı veya uygun olmamasıdır. Bu kabloları kontrol edin ve gerekirse değiştirin.
   - Telekom tarafından sağlanan uygun bir kablo kullanmaya özen gösterin ve kabloların elektrik hatlarına çok yakın olmamasına dikkat edin.

2. **Router'ı Yeniden Başlatın**:
   - Router'ınızı kapatıp, birkaç dakika bekledikten sonra tekrar açın. Bu, bazen küçük sorunları çözebilir.
   - Router'ın tüm kablolarının düzgün takılı olduğunu kontrol edin.

3. **WLAN Sinyal Gücünü Artırın**:
   - Router'ınızın sinyalini engelleyebilecek objelerden (akvaryum, kalorifer, bitkiler, taş yapılar) ve cihazlardan (DECT telefonlar, mikrodalga fırınlar, Bluetooth cihazları) uzaklaştırın.
   - Gerekirse, daha iyi bir kapsama al

#### Decomposition
##### Hala sanki 3. soruya göre cevap veriyor gibi, ama scratch'de de oyle

In [60]:
# Answers recursively

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()


q_a_pairs = ""
for q in decomposition_questions:
    
    decomposition_rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | model
    | StrOutputParser())

    answer_decomposition = decomposition_rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer_decomposition)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

print("\nDecomposition questions: ", decomposition_questions)
print("\nDecomposition answer: ", answer_decomposition)


Decomposition questions:  ['1. "İnternet hız testi nasıl yapılır?"', '2. "İnternet bağlantı kesintileri neden olur?"', '3. "İnternet hızını artırmak için ne yapabilirim?"']

Decomposition answer:  İnternet hızını artırmak için aşağıdaki adımları izleyebilirsiniz:

1. **Modeminizi Yeniden Başlatın**: Modeminizi kapatıp birkaç dakika sonra tekrar açarak bağlantıyı yenileyin. Bu, modemin geçici sorunlarını çözebilir.

2. **Doğru Kabloları Kullanın**: Modem ile cihazınız arasındaki kabloların sağlam ve doğru şekilde bağlı olduğundan emin olun. Özellikle LAN kablosu (Ethernet kablosu) kullanmak, daha kararlı ve hızlı bir bağlantı sağlar. En az CAT 5E standartında bir kablo kullanmanız önerilir.

3. **Modeminizi Merkezi Bir Konuma Yerleştirin**: Modeminizi evinizin merkezi bir yerine yerleştirin. Modeminiz ile cihazlarınız arasında fiziksel engeller varsa, bu sinyal gücünü azaltabilir. 

4. **Wi-Fi Kanalını Değiştirin**: Modeminizin yönetim arayüzüne girerek Wi-Fi kanalını değiştirebilirsin

In [61]:
# Answer each sub-question individually 

def retrieve_and_rag(question,prompt_telekom,sub_question_generator_chain):
    """RAG on each sub-question"""
    
    # Use our decomposition / 
    sub_questions = sub_question_generator_chain.invoke({"question":question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_telekom | model | StrOutputParser()).invoke({"context": retrieved_docs, 
                                                                "question": sub_question})
        rag_results.append(answer)
    
    return rag_results,sub_questions

# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, decomposition_individual_questions = retrieve_and_rag(question, prompt_telekom, generate_queries_decomposition)

def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""
    
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

decomposition_individual_context = format_qa_pairs(decomposition_individual_questions, answers)

decomposition_individual_rag_chain = (
    decomposition_individual_prompt
    | model
    | StrOutputParser()
)

decomposition_individual_answer = decomposition_individual_rag_chain.invoke({"decomposition_individual_context":decomposition_individual_context,"question":question})
print("\nAnswer: ", decomposition_individual_answer)


Answer:  Evinizde kullandığınız internetin hızından problem yaşıyor ve kesintiler oluyorsa, aşağıdaki adımları izleyebilirsiniz:

1. **İnternet Hızını Ölçme**: İnternet hızınızı ölçmek için bir hız testi yapabilirsiniz. Ancak, bu konuda doğrudan yardımcı olamıyorum. Detaylı bilgi ve destek için [Telekom Yardım Sayfası](https://www.telekom.de/hilfe) adresini ziyaret edebilirsiniz.

2. **Modem ve Router Kontrolü**: Modem veya router’ınızda internet kesintileri yaşıyorsanız, öncelikle tüm kabloların doğru şekilde bağlı olup olmadığını kontrol edin. Modemi veya router’ı yeniden başlatmayı veya 10 dakika boyunca fişten çekmeyi deneyin. Eğer router üzerindeki ışıklar normal değilse, cihazı yeniden başlatın. 

3. **İnternet Sağlayıcınızla İletişime Geçme**: İnternet sağlayıcınızla hız ve kesinti sorunları hakkında iletişime geçmek için [Mein Magenta-App](https://www.telekom.de/hilfe/hilfeangebote-hilfe-bei-stoerungen)'i kullanabilirsiniz. Uygulamaya Telekom bilgilerinizi kullanarak giriş yap

### CRAG - Self-RAG
##### KENDI YAPIMI KURDUM TEZIN MERKEZI BURASI COK IYI ANLATILMALI
###### Eger retriever'da gerekli context bulamioyrsa web search yapiyor, ve web sonucuyla beraber context'i veriyor.


In [None]:
### Tavily web search tool
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)

# Executing question, 
# qna_search performs a search and returns a str containing an answer to the original query.
#web_search_tool = tavily_client.qna_search(question)
#print(web_search_tool)

### Retrieval Grader
# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")

### Hallucination Grader
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

### Answer Grader
# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

# LLM with function call for retrieval
structured_llm_grader = model.with_structured_output(GradeDocuments)
# LLM with function call for hallucination
structured_llm_hallucination_grader = model.with_structured_output(GradeHallucinations)
# LLM with function call for answer
structured_llm_answer_grader = model.with_structured_output(GradeAnswer)

# Chain
srag_chain = prompt_telekom | model | StrOutputParser()

# Run
# DIKKAT!!! orjinal halinde docs ve retriever var, ben burda deniyorum docs_all ve retriever_all ile
generation = srag_chain.invoke({"context": docs, "question": question})
retrieval_grader = grade_prompt | structured_llm_grader
hallucination_grader = hallucination_prompt | structured_llm_hallucination_grader
hallucination_grader.invoke({"documents": docs, "generation": generation})
answer_grader = answer_prompt | structured_llm_answer_grader
answer_grader.invoke({"question": question, "generation": generation})


# Re-write question ve append new web search context
grader_docs = retriever.get_relevant_documents(question)
doc_txt = " ".join([doc.page_content for doc in grader_docs])
question_rewriter = re_write_prompt | model | StrOutputParser()
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))
question_rewriter.invoke({"question": question})


##### Graph logic 

In [None]:
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """
    question: str
    generation: str
    web_search: str
    documents: List[str]

def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.get_relevant_documents(question)
    print("\nROUTED DOCS: ",documents)
    return {"documents": documents, "question": question}

def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    rag_chain = prompt_telekom | model | StrOutputParser()
    #print(documents)

    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    print("\nDOCUMENTS:", documents)   
    print("\nAnswer:", generation)

    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    web_search = "No" # DIKKAT! SELF-RAG'ta burayi cikarmis
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        print("\nORJINAL SORU:", question)
        print(d.page_content)
        
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            web_search = "Yes"
            continue
    print("\nRELEVANT CONTEXT: ", filtered_docs)
    return {"documents": filtered_docs, "question": question, "web_search": web_search}


def transform_query(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    #documents = state["documents"]

    # Re-write question
    better_question = question_rewriter.invoke({"question": question})
    #return {"documents": documents, "question": better_question}
    return {"question": better_question}

def web_search(state):
    """
    Web search based on the re-phrased question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with appended web results
    """

    print("---WEB SEARCH---")
  
    question = state["question"]
    documents = state["documents"]
    #filtered_docs = state["filtered_docs"]
    print(question)
    # Web search
    web_search_tool = tavily_client.qna_search(question)
    web_results = Document(page_content=web_search_tool)

    #orjinalinde direkt burdaki gibi retriever'dan aliyor ama ben filtered_docs alicam sadece
    #documents = retriever.get_relevant_documents(question)
    #documents.append(filtered_docs)
    documents.append(web_results)
    #print(documents)
    return {"documents": documents, "question": question}

### Edges

def decide_to_generate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    web_search = state["web_search"]
    print(web_search)
    state["documents"]
    #state["filtered_docs"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: SOME DOCUMENTS ARE NOT RELEVANT TO QUESTION, WEB SEARCH---"
        )
        return "web_search_node"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]
    print("\nQUESTION:", question)
    print("\nDOCUMENTS:", documents)
    print("\nGENERATION:", generation)

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score.binary_score
    print(grade)

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score.binary_score
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"



##### RUN graph 

In [None]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generatae
workflow.add_node("transform_query", transform_query)  # transform_query
workflow.add_node("web_search_node", web_search)  # web search

# Build graph
workflow.add_edge(START, "transform_query")
workflow.add_edge("transform_query", "retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "web_search_node": "web_search_node",
        "generate": "generate",
    },
)
workflow.add_edge("web_search_node", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "transform_query",
    },
)

# Compile
app = workflow.compile()

# Run
inputs = {"question": question}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint("\n---\n")

# Final generation
answer = value["generation"]


##### ANSWER

In [None]:
answer