# Chatbot workflow

In [None]:
import os
from dotenv import load_dotenv
from langsmith import traceable

load_dotenv()

In [None]:
from typing import Annotated

from typing_extensions import TypedDict

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages


class State(TypedDict):
    # Messages have the type "list". The `add_messages` function
    # in the annotation defines how this state key should be updated
    # (in this case, it appends messages to the list, rather than overwriting them)
    messages: Annotated[list, add_messages]


graph_builder = StateGraph(State)

In [None]:
from langchain_ibm import ChatWatsonx

llm_params = {
    "decoding_method": "sample",
    "max_new_tokens": 500,
    "min_new_tokens": 1,
    "temperature": 0.5,
    "top_k": 50,
    "top_p": 1,
}

llm = ChatWatsonx(
    model_id="meta-llama/llama-3-1-70b-instruct",
    url="https://us-south.ml.cloud.ibm.com",
    project_id=os.environ["WX_PROJECT_ID"],
    apikey=os.environ["WX_API_KEY"],
    params=llm_params
)

def chatbot(state: State):
    return {"messages": [llm.invoke(state["messages"])]}



#### Compile graph

In [None]:
from IPython.display import Image, display
# The first argument is the unique node name
# The second argument is the function or object that will be called whenever
# the node is used.
graph_builder.add_edge(START, "chatbot")
graph_builder.add_node("chatbot", chatbot)
graph_builder.add_edge("chatbot", END)

graph = graph_builder.compile()

display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
def stream_graph_updates(user_input: str):
    for event in graph.stream({"messages": [("user", user_input)]}):
        for value in event.values():
            print("Assistant:", value["messages"][-1].content)


while True:
    try:
        user_input = input("User: ")
        if user_input.lower() in ["quit", "exit", "q"]:
            print("Goodbye!")
            break

        stream_graph_updates(user_input)
    except:
        # fallback if input() is not available
        user_input = "What do you know about LangGraph?"
        print("User: " + user_input)
        stream_graph_updates(user_input)
        break

# Agentic Workflow

In [None]:
"""
- crawl a website [DONE]
- check if it's 500 hundred words, split it if needed and save it
- run the workflow
    - retrieve the md using the input name
    - generate multiple questions and answers
    - select the best 3 
    - save the qna json list

- create the YAML
- launch instructlab

"""

In [None]:
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field

# Parser
class QuestionsandAnswers(BaseModel):
    question: str = Field(description="question related to the context")
    answer: str = Field(description="answer to the question using the context")

parser = JsonOutputParser(pydantic_object=QuestionsandAnswers)    

In [None]:
import os
from langchain_ibm import WatsonxLLM
from langchain_core.prompts import PromptTemplate

# LLM
template = """
You are a robot that only outputs JSON.
You reply in JSON format, do not introduce your answer.
Using the provided context, create a relevant question and answer, that would help deepen someone's knowledge.

Context:
{context}

{format_instructions}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

parameters = {
    "decoding_method": "sample",
    "max_new_tokens": 200,
    "min_new_tokens": 10,
    "temperature": 0.5,
    "top_k": 50,
    "top_p": 1,
}

llm = WatsonxLLM(
    model_id="meta-llama/llama-3-1-70b-instruct",
    # model_id="meta-llama/llama-3-405b-instruct",
    url="https://us-south.ml.cloud.ibm.com",
    project_id=os.environ["WX_PROJECT_ID"],
    apikey=os.environ["WX_API_KEY"],
    params=parameters
)

qna_chain = prompt | llm | parser

In [None]:
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from mypy_extensions import TypedDict
from typing import List, Annotated
import operator
from langgraph.types import Send
import json

# States
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        md_filename: markdown filename
        context: question
        questions_and_answers: LLM generation
    """

    md_filename: str
    iterations: list
    context: str
    questions_and_answers: Annotated[list, operator.add]
    

class QnAState(TypedDict):
    context: str
    iteration: int


# Node function
def retrieve_markdown(state: GraphState):
    """
    Retrieve documents
    """
    print("---RETRIEVE---")
    md_filename = state["md_filename"]

    # Retrieval
    markdown_path = os.path.join("yaml_construction/context", md_filename)
    loader = UnstructuredMarkdownLoader(markdown_path)
    document = loader.load()
    print(state["iterations"])
    return {"md_filename": md_filename, "context": document}


def generate_qna(state: QnAState):
    """
    Generate answer
    """
    context = state["context"]

    print("---GENERATE---")
    generation = qna_chain.invoke({"context": context})
    return {"questions_and_answers": [generation]}


def select_qna(state):
    print("---SELECT---")
    
    for qna in state["questions_and_answers"]:
        print(qna['question'])

    # Saving
    filename=state["md_filename"]
    qna_dict = {"questions_and_answers": state["questions_and_answers"]}
    fp = f"yaml_construction/questions_and_answers/{filename}.json"
    with open(fp, "w") as f:
        json.dump(qna_dict, f)


# Edge functions
def continue_to_qna(state: GraphState):
    return [Send("generate_qna", {"context": state["context"], "iteration": i}) for i in state["iterations"]]


In [None]:
from IPython.display import Image, display
from langgraph.graph import END, StateGraph, START

graph = StateGraph(GraphState)

# Define the nodes and edges
graph.add_edge(START, "retrieve_markdown")
graph.add_node("retrieve_markdown", retrieve_markdown)  # markdown retriever
graph.add_conditional_edges("retrieve_markdown", continue_to_qna ,["generate_qna"])
graph.add_node("generate_qna", generate_qna)  # generatae
graph.add_edge("generate_qna", "select_qna")
graph.add_node("select_qna", select_qna)  # grade the questions
graph.add_edge("select_qna", END)

workflow = graph.compile()
display(Image(workflow.get_graph().draw_mermaid_png()))

In [None]:
# Start workflow
for markdown in os.listdir("yaml_construction/context")[:]:
    while True:
        try:
            print(f"Converting {markdown}")
            for s in workflow.stream({"md_filename": markdown, "iterations": list(range(3))}):
                print(s)

        except Exception:
            print(Exception)
            continue
        break