In [6]:
# Import necessary libraries
import os
import time  # Import time to measure response time
import streamlit as st
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_community.callbacks import get_openai_callback
from langgraph.graph import END, StateGraph, START
from indexing import get_vectorstore
from pprint import pprint
import routing

from tavily import TavilyClient
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.docstore.document import Document
from typing_extensions import TypedDict
from typing import List
import initials
import prompts


### Tavily web search tool
tavily_client = TavilyClient(api_key = initials.TAVILY_API_KEY)

# Executing question, 
# qna_search performs a search and returns a str containing an answer to the original query.
#web_search_tool = tavily_client.qna_search(question)
#print(web_search_tool)

### Retrieval Grader
# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")

### Hallucination Grader
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

### Answer Grader
# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

In [7]:
#question = "Tesla'nin en yeni modeli hangisidir?"
question = "Evimda kullandigim internetin hizinda problem yasiyorum, kesintiler oluyor, ne yapmaliyim?"

In [8]:
# Vector store ve retriever'i yükle
vector_store = get_vectorstore(question, initials.model, initials.data_directory, initials.embedding)
retriever = vector_store.as_retriever()

# Geri getirme işlevi için yapılandırılmış LLM
structured_llm_grader = initials.model.with_structured_output(GradeDocuments)
# Halüsinasyon işlevi için yapılandırılmış LLM
structured_llm_hallucination_grader = initials.model.with_structured_output(GradeHallucinations)
# Cevap işlevi için yapılandırılmış LLM
structured_llm_answer_grader = initials.model.with_structured_output(GradeAnswer)

# Soru yönlendirme işlevi için yapılandırılmış LLM
structured_llm_router = initials.model.with_structured_output(routing.RouteUserQuery)

# Promt'ları bağlama
retrieval_grader = initials.grade_prompt | structured_llm_grader
hallucination_grader = initials.hallucination_prompt | structured_llm_hallucination_grader
answer_grader = initials.answer_prompt | structured_llm_answer_grader
question_rewriter = prompts.re_write_prompt | initials.model | StrOutputParser()

# Retrieving relevant documents
grader_docs = retriever.get_relevant_documents(question)
if not grader_docs:
    raise ValueError("No relevant documents found for the question.")

# Retrieved document contents
doc_txt = " ".join([doc.page_content for doc in grader_docs])

# Chroma vector store fonksiyonu
def get_vectorstore(question, model, data_directory, embedding):
    # Özetleri yükleme
    summaries = load_summaries(get_specific_directory(question, model, data_directory))
    
    # Özetsiz durumda hata verme
    if not summaries:
        raise ValueError("No summaries found for the given question.")
    
    print(f"Loaded summaries: {len(summaries)}")
    
    # Chroma vector store oluşturma
    summary_vectorstore = create_chroma_vectorstore(summaries, embedding)
    return summary_vectorstore

# Chroma vector store'u oluşturma fonksiyonu
def create_chroma_vectorstore(summaries, embedding):
    # Dokümanları oluşturma
    documents = [Document(page_content=summary) for summary in summaries]
    
    # Geçerli ID ve içerik kontrolü
    if not documents or any(doc.page_content is None for doc in documents):
        raise ValueError("Documents must have valid content and IDs.")
    
    # Document ID'lerini ayarlama
    ids = [f"id_{i}" for i in range(len(documents))] if documents else None
    
    # ID ve dokümanların doğruluğunu kontrol etme
    if not ids or len(ids) == 0:
        raise ValueError("Expected IDs to be a non-empty list.")
    
    print(f"Documents created: {len(documents)}")
    print(f"Document IDs: {ids}")
    
    # Chroma vektör mağazasını oluşturma
    summary_vectorstore = Chroma.from_documents(documents=documents, embedding=embedding, ids=ids)
    return summary_vectorstore

Total embeddings calculated: 0
Total documents created: 0


ValueError: Expected IDs to be a non-empty list, got 0 IDs

In [None]:
# Vector store ve retriever'i yükle
vector_store = get_vectorstore(question, initials.model, initials.data_directory, initials.embedding)
retriever = vector_store.as_retriever()

# Geri getirme işlevi için yapılandırılmış LLM
structured_llm_grader = initials.model.with_structured_output(GradeDocuments)
# Halüsinasyon işlevi için yapılandırılmış LLM
structured_llm_hallucination_grader = initials.model.with_structured_output(GradeHallucinations)
# Cevap işlevi için yapılandırılmış LLM
structured_llm_answer_grader = initials.model.with_structured_output(GradeAnswer)

# Soru yönlendirme işlevi için yapılandırılmış LLM
structured_llm_router = initials.model.with_structured_output(routing.RouteUserQuery)

# Promt'ları bağlama
retrieval_grader = initials.grade_prompt | structured_llm_grader
hallucination_grader = initials.hallucination_prompt | structured_llm_hallucination_grader
answer_grader = initials.answer_prompt | structured_llm_answer_grader
question_rewriter = prompts.re_write_prompt | initials.model | StrOutputParser()

# Retrieving relevant documents
grader_docs = retriever.get_relevant_documents(question)
if not grader_docs:
    raise ValueError("No relevant documents found for the question.")

# Retrieved document contents
doc_txt = " ".join([doc.page_content for doc in grader_docs])

# Chroma vector store fonksiyonu
def get_vectorstore(question, model, data_directory, embedding):
    # Özetleri yükleme
    summaries = load_summaries(get_specific_directory(question, model, data_directory))
    
    # Özetsiz durumda hata verme
    if not summaries:
        raise ValueError("No summaries found for the given question.")
    
    print(f"Loaded summaries: {len(summaries)}")
    
    # Chroma vector store oluşturma
    summary_vectorstore = create_chroma_vectorstore(summaries, embedding)
    return summary_vectorstore

# Chroma vector store'u oluşturma fonksiyonu
def create_chroma_vectorstore(summaries, embedding):
    # Dokümanları oluşturma
    documents = [Document(page_content=summary) for summary in summaries]
    
    # Geçerli ID ve içerik kontrolü
    if not documents or any(doc.page_content is None for doc in documents):
        raise ValueError("Documents must have valid content and IDs.")
    
    # Document ID'lerini ayarlama
    ids = [f"id_{i}" for i in range(len(documents))] if documents else None
    
    # ID ve dokümanların doğruluğunu kontrol etme
    if not ids or len(ids) == 0:
        raise ValueError("Expected IDs to be a non-empty list.")
    
    print(f"Documents created: {len(documents)}")
    print(f"Document IDs: {ids}")
    
    # Chroma vektör mağazasını oluşturma
    summary_vectorstore = Chroma.from_documents(documents=documents, embedding=embedding, ids=ids)
    return summary_vectorstore

Total embeddings calculated: 0
Total documents created: 0


ValueError: Expected IDs to be a non-empty list, got 0 IDs

In [None]:
# === GRAPH ===

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """
    question: str
    generation: str
    web_search: str
    documents: List[str]

### Nodes


## BURADA ARGÜMAN OLARAK RETRIEVER YOK NORMALDE CALISMIYORSA CIKAR
def retrieve(state, retriever):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.get_relevant_documents(question)
    print("\nROUTED DOCS: ",documents)
    return {"documents": documents, "question": question}

def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    rag_chain = prompts.prompt_telekom | initials.model | StrOutputParser()
    #print(documents)

    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    print("\nDOCUMENTS:", documents)   
    print("\nAnswer:", generation)

    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    filtered_docs = []
    web_search = "No" # DIKKAT! SELF-RAG'ta burayi cikarmis
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        print("\nORJINAL SORU:", question)
        print(d.page_content)
        
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            web_search = "Yes"
            continue
    print("\nRELEVANT CONTEXT: ", filtered_docs)
    return {"documents": filtered_docs, "question": question, "web_search": web_search}


def transform_query(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    #documents = state["documents"]

    # Re-write question
    better_question = question_rewriter.invoke({"question": question})
    #return {"documents": documents, "question": better_question}
    return {"question": better_question}

def web_search(state):
    """
    Web search based on the re-phrased question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with appended web results
    """

    print("---WEB SEARCH---")

    question = state["question"]
    documents = state["documents"]
    #filtered_docs = state["filtered_docs"]
    print(question)
    # Web search
    web_search_tool = tavily_client.qna_search(question)
    web_results = Document(page_content=web_search_tool)

    #orjinalinde direkt burdaki gibi retriever'dan aliyor ama ben filtered_docs alicam sadece
    #documents = retriever.get_relevant_documents(question)
    #documents.append(filtered_docs)
    documents.append(web_results)
    #print(documents)
    return {"documents": documents, "question": question}

### Edges

def route_question(state):
    """
    Route question to web search or RAG 

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---ROUTE QUESTION---")
    source = structured_llm_router.invoke([SystemMessage(content=routing.query_router_instructions)] + [HumanMessage(content=state["question"])]) 
    if source.datasource == 'websearch':
        print("---ROUTE QUESTION TO WEB SEARCH---")
        return "websearch"
    elif source.datasource == 'vectorstore':
        print("---ROUTE QUESTION TO RAG---")
        return "vectorstore"


def decide_to_generate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    web_search = state["web_search"]
    print(web_search)
    state["documents"]
    #state["filtered_docs"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print(
            "---DECISION: SOME DOCUMENTS ARE NOT RELEVANT TO QUESTION, WEB SEARCH---"
        )
        return "web_search_node"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]
    print("\nQUESTION:", question)
    print("\nDOCUMENTS:", documents)
    print("\nGENERATION:", generation)

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score.binary_score
    print(grade)

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        grade = score.binary_score
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"


workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generatae
workflow.add_node("transform_query", transform_query)  # transform_query
workflow.add_node("web_search_node", web_search)  # web search

# Build graph
# workflow.set_entry_point("transform_query") bu sekilde de grafik'e baslanabilir, alttakiyle ayni.
workflow.add_edge(START, "transform_query")
workflow.add_edge("transform_query", "retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "web_search_node": "web_search_node",
        "generate": "generate",
    },
)
workflow.add_edge("web_search_node", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "transform_query",
    },
)

# Compile
app = workflow.compile()

# Run
inputs = {"question": question}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint(f"Node '{key}':")
        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
    pprint("\n---\n")

# Final generation
response = value["generation"]