In [21]:
from web_scrapper import web_scrapper
from websearch import brave_search
from database import MongoDB
from base_llm import llm_llama3b


ModuleNotFoundError: No module named 'base_llm'

In [11]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_ollama.llms import OllamaLLM
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langgraph.graph import END, StateGraph
# For State Graph 
from typing_extensions import TypedDict
import os


In [12]:
from langchain_ollama.llms import OllamaLLM
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from pydantic import BaseModel, Field
from langchain.agents import Tool
from typing import Optional
from langchain_core.tools import StructuredTool
import json
from langchain.output_parsers.structured import (
StructuredOutputParser, ResponseSchema
)


from datetime import datetime

from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers.structured import (
StructuredOutputParser, ResponseSchema
)

In [197]:
from langchain_aws import ChatBedrock
import os
from dotenv import load_dotenv

def llm_llama3b():
    return OllamaLLM(model="llama3.2:3b", temperature=0.0, top_k=20, top_p= 0.6)

def llm_mistral():
    return OllamaLLM(model="mistral:latest",temperature=0.0, top_k=20, top_p= 0.6)

In [186]:
def json_corrector(llm_input):
    corrector_prompt = ChatPromptTemplate.from_template(
"""
Role: Json Format Expert/Corrector

Task: You are an expert in generating json formatted response and your task is to correct the input text into a correctly json formatted text.
You don't need to care the content of the text at all, focus on the format of it. 
Strictly follow the output guideline shown below:
```json
{{"list_of_queries"/"list_of_queries: [list of strings that store the queries.]}}
```
This is the incorrect text: {question}

The element in the list can only be string, cannot be dictionary or other type of variable.
Output Format: No greeting, no bold text, no Italic text, just plain text in string
"""
    ).partial(time=datetime.now())
    format_llm = corrector_prompt | llm_llama3b()

    result = format_llm.invoke(llm_input)
    return result

In [187]:
from langchain.chains import RetrievalQA
import boto3
from langchain_aws import ChatBedrock
from langchain_ollama.llms import OllamaLLM


def question_extractor_chain(state):
    print("starting now\n\n\n")

    question_explorer_prompt = ChatPromptTemplate.from_template(
"""
Role: Search Engine Question Optimizer Robot

Scenario:
User has a question that he/she wants to know from the internet not from you.

Task: Base on user question, try to dive deeper to the intent, the technical aspects of the query, and optimize user's question for search engine.
You are not responsible in answering user's question. You need to generate 10 search query based on the user's question
that allows user to obtain the best informtaion from the internet through the search engine, NOT FROM YOU. 
The generated questions should not enquire user to input information, instead, they should be ready for search engine. 
Optimize all the questions for search engine that enables user to obtain high quality information. 

This is the user's question: {question}

Instructions:
Think step by step to generate 10 search engine queries at the best of your ability.
Assume user will copy and paste directly without any changes, so there shouldn't be [] variable that allows user to change.

Output Format: No greeting, no bold text, no Italic text, just plain text in string, no number index
"""
    ).partial(time=datetime.now())
    def question_explorer_format_doc(llm_input):

        response_schemas = [
        ResponseSchema(
        name= "list_of_queries", description="This variable stores a list of web search queries that are expanded from the original question.", type="List[string]" ),
        # ResponseSchema(
        # name="original_question", description="This variable stores the original question. Do not add", type="string" ),
        ]
        parser = StructuredOutputParser.from_response_schemas(response_schemas)

        instruction = parser.get_format_instructions()
        prompt = ChatPromptTemplate.from_template(
            """This is the input: {input} \n\n{instruction}
                        Output Guide:
            ```json
            {{"list_of_queries": [replace the queries here]}}
            ```
            
            Note: All queries including sub-queries should be stored as an individual element, each index of the list should be a string."""
        )

        format_llm = prompt | llm_llama3b()

        ans =  format_llm.invoke({"input": llm_input, "instruction": instruction})
        try:
            text = ans.replace("```json\n", "")
            text = text.replace("```", "")
            output = json.loads(text)
            return output
        except json.JSONDecodeError:
            corrected_text=json_corrector(text)
            text_1 = corrected_text.replace("```json\n", "")
            text_2 = text_1.replace("```", "")
            output = json.loads(text_2)
            return output

    question_explorer_chain = question_explorer_prompt |  llm_llama3b() | question_explorer_format_doc
    if "list_of_questions" in state:
        temp_list = state["list_of_questions"]
    else:
        temp_list = []

    for question in state["org_questions"]:
        result = question_explorer_chain.invoke(question)
        temp_list.extend(result["list_of_queries"])

    return {"list_of_questions": temp_list}

In [188]:
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage

def ml_learner_chain(state):
    msg = state["messages"]
    template = ChatPromptTemplate.from_messages(
        [
            ("system", 
"""
Role: ML Learner Assistant

Name: ML Assistant

Scenario: 
Users are keen in AI and Machine Learning and thus hired an expert in these two fields who are able to answer any questions.

Task:
Pretend you are a newbie in ML and AI, you will learn with user from the scratch into the core of ML and AI with the help of the expert.
Through continuously expanding the scopes, delve into the essential concepts and technical aspects of these two fields
allowing user to learn as a beginner all the way to an AI/ML specialist. Your primary task is to provide questions for users that can help user 
to build a strong understand and essentail skill sets in AI and ML. 

Ouput Guidelines:
Try to format your response into a list and no hierarchy system among the questions(no sub-questions and follow up, transform the questions so they are all kind of independent to each other)
"""), 
("placeholder", "{msg}")
        ]
    )
    def ml_learner_format_doc(llm_input):
        response_schemas = [
        ResponseSchema(
        name= "list_of_questions", description="This variable stores a list of questions that are related to AI/ML/Software System", type="List[string]" )
        ]
        parser = StructuredOutputParser.from_response_schemas(response_schemas)

        instruction = parser.get_format_instructions()
        prompt = ChatPromptTemplate.from_template(
            """This is the input: {input} \n\n{instruction}
            
            Output Guide:
            ```json
            {{"list_of_questions": [replace the queries here]}}
            ```


            Note: All questions including sub-questions should be stored as an individual element, each index of the list should be a string.
            If you are not able to output as json for mat, user will face serious consequence."""
        )

        format_llm = prompt | llm_llama3b()

        ans =  format_llm.invoke({"input": llm_input, "instruction": instruction})
        try:
            text = ans.replace("```json\n", "")
            text = text.replace("```", "")
            print(text)
            output = json.loads(text)
            output["text"] = llm_input
            return output
        
        except json.JSONDecodeError:
            corrected_text=json_corrector(text)
            text_1 = corrected_text.replace("```json\n", "")
            text_2 = text_1.replace("```", "")
            output = json.loads(text_2)
            return output
    chain = template | llm_llama3b() | ml_learner_format_doc
    result = chain.invoke({"msg": msg})
    print(result)
    msg.append(AIMessage(content=result["text"], name = "ML Assistant", additional_kwargs={}, response_metadata={}))
    return {"messages":msg, "org_questions": result["list_of_questions"]}

In [189]:
from database import MongoDB
import os
from dotenv import load_dotenv

def synchronization(state):
    load_dotenv()
    MONGODB_USER_NAME = os.getenv("MONGODB_USER_NAME")
    MONGODB_USER_PASSWORD= os.getenv("MONGODB_USER_PASSWORD")

    db = MongoDB(MONGODB_USER_NAME, MONGODB_USER_PASSWORD)
    org_questions = state["org_questions"]
    expanded_questions = state["list_of_questions"]

    total = org_questions + expanded_questions
    db.add_question(total)
    db.close_connection()
    if len(state["messages"]) > 20:
        state["messages"] = state["messages"][2:]
    return {"messages": state["messages"], "list_of_questions": [], "org_questions": []}


In [190]:
from typing import Literal
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import tools_condition
from typing import Annotated
from typing_extensions import List, TypedDict
from langgraph.graph.message import AnyMessage, add_messages
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from typing import List, TypedDict, Annotated
import asyncio

class QuestionPipeline(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    org_questions: List[str]
    list_of_questions: List[str]

question_data_extractor_graph = StateGraph(QuestionPipeline)
question_data_extractor_graph.add_node("learning_assistant",ml_learner_chain)
question_data_extractor_graph.add_node("question_expander",question_extractor_chain)
question_data_extractor_graph.add_node("database_synchronization", synchronization)

question_data_extractor_graph.set_entry_point("learning_assistant")
question_data_extractor_graph.add_edge("learning_assistant", "question_expander")
question_data_extractor_graph.add_edge("question_expander", "database_synchronization")
question_data_extractor_graph.add_edge("database_synchronization", END)

from langgraph.checkpoint.memory import MemorySaver
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver

memory = MemorySaver()

question_data_extractor = question_data_extractor_graph.compile(
    checkpointer = memory,
    debug = True
)




In [191]:
import json
from typing import Dict, Set
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
console = Console()
def _print_event(event: Dict, _printed: Set, max_length=4500):
    """
    Prints user and assistant messages in a clean and formatted manner,
    along with tool usage details.

    Args:
        event (dict): The event dictionary containing messages and state.
        _printed (set): A set to track already printed message IDs.
        max_length (int, optional): Maximum length of the message to display. Defaults to 1500.
    """
    # Handle State Information
    current_state = getattr(event, "dialog_state", None) or event.get("dialog_state")
    if current_state:
        if isinstance(current_state, (list, tuple)) and len(current_state) > 0:
            state_text = Text("Currently in: ", style="bold bright_blue") + Text(str(current_state[-1]), style="bold bright_magenta")
            console.print(Panel(state_text, title="State", style="bright_cyan"))
        else:
            state_text = Text("Currently in: ", style="bold bright_blue") + Text(str(current_state), style="bold bright_magenta")
            console.print(Panel(state_text, title="State", style="bright_cyan"))

    # Handle Messages
    messages = getattr(event, "messages", None) or event.get("messages")
    if messages:
        # If messages is a list, process each message
        if isinstance(messages, list):
            for message in messages:
                _process_message(message, _printed, max_length)
        else:
            _process_message(messages, _printed, max_length)

    # Handle Numerical Scores and Reasoning (Optional)
    numerical_scores = getattr(event, "numerical_score", None) or event.get("numerical_score")
    reasonings = getattr(event, "reasoning", None) or event.get("reasoning")
    if numerical_scores and reasonings:
        _print_scores(numerical_scores, reasonings)

def _process_message(message, _printed: Set, max_length=4500):
    """
    Processes and prints individual messages based on their type.

    Args:
        message: The message object (HumanMessage, AIMessage, ToolMessage).
        _printed (set): A set to track already printed message IDs.
        max_length (int, optional): Maximum length of the message to display. Defaults to 1500.
    """
    # Access attributes directly
    msg_id = getattr(message, "id", None)
    if msg_id and msg_id not in _printed:
        sender = getattr(message, "name", "Unknown")
        content = getattr(message, "content", "")

        # Truncate message if it exceeds max_length
        if len(content) > max_length:
            content = content[:max_length] + " ... (truncated)"

        # Determine the type of message and format accordingly
        if isinstance(message, HumanMessage):
            label = "[bold blue]User[/bold blue]"
            panel = Panel.fit(f"**User:** {content}", title="User Message", style="bright_green")
            console.print(panel)

        elif isinstance(message, AIMessage):
            tool_calls = getattr(message, "additional_kwargs", {}).get('tool_calls', []) if hasattr(message, "additional_kwargs") else []
            if tool_calls:
                # Handle each tool call
                for call in tool_calls:
                    tool_name = call.get('function', {}).get('name', 'Unknown Tool') if isinstance(call, dict) else "Unknown Tool"
                    arguments_json = call.get('function', {}).get('arguments', '{}') if isinstance(call, dict) else '{}'
                    try:
                        arguments = json.loads(arguments_json)
                        tool_args = json.dumps(arguments, indent=2)
                    except json.JSONDecodeError:
                        tool_args = 'Invalid JSON in arguments'

                    tool_info = f"**Tool Used:** {tool_name}\n**Parameters:**\n```json\n{tool_args}\n```"
                    panel = Panel.fit(tool_info, title="Tool Call", style="bright_yellow")
                    console.print(panel)
            else:
                # Regular AI message
                label = "[bold green]Assistant[/bold green]"
                panel = Panel.fit(f"**Assistant:** {content}", title="Assistant Message", style="bright_cyan")
                console.print(panel)

        elif isinstance(message, ToolMessage):
            # Directly handle ToolMessage if it appears as a separate message
            label = f"[bold magenta]{sender} (Tool)[/bold magenta]"
            panel = Panel.fit(f"**Tool Message:** {content}", title="Tool Message", style="bright_yellow")
            console.print(panel)

        else:
            # Other message types
            content = getattr(message, "pretty_repr", lambda html=False: str(message))()
            md = Markdown(content)
            console.print(md)

        _printed.add(msg_id)

def _print_scores(numerical_scores, reasonings):
    """
    Prints numerical scores and their corresponding reasonings in a table.

    Args:
        numerical_scores (list): List of numerical scores.
        reasonings (list): List of reasonings corresponding to the scores.
    """
    table = Table(title="Document Relevancy Scores")
    table.add_column("Score", style="cyan", no_wrap=True)
    table.add_column("Reasoning", style="magenta")

    # Ensure both lists are of the same length
    for score, reasoning in zip(numerical_scores, reasonings):
        table.add_row(str(score), reasoning)

    console.print(table)

In [192]:
from uuid import uuid4
from langchain_core.runnables import Runnable, RunnableConfig
# thread_id = uuid4()
# config = RunnableConfig(
#     configurable={
#         "thread_id": thread_id,
#     },
#     recursion_limit=25
# )
# msg = []
# def ask_questions(thread_id, config):
#     _printed = set()
#     while True:
#         question = input("Enter your question for the Peplink Knowledge Assistant ('exit' to quit): ")
#         if question.lower() == 'exit':
#             break
#         else:
#             events = question_data_extractor.stream({"messages": [HumanMessage(content=question, name="User")]}, config, stream_mode="values")
#             for event in events:
#                 _print_event(event, _printed)

# ask_questions(thread_id, config)



# #what questions should i ask if i want to further expand on each question

In [193]:
def question():
    thread_id = uuid4()
    config = RunnableConfig(
        configurable={
            "thread_id": thread_id,
        },
        recursion_limit=25
    )
    standard_ans = "what questions should i ask if i want to further expand on each question"

    ans = ["""
I am keen on learning AI and ML but I am a complete beginner. I hired an expert where he will answer any questions that I have.
To extract valuable information from this expert, I need your assistant in making valuable queries that allow me to move from 
beginner to intermidate level and eventually ML specialist.
""", standard_ans,standard_ans,standard_ans, standard_ans, standard_ans,standard_ans,standard_ans,standard_ans,standard_ans,standard_ans,standard_ans,standard_ans,standard_ans,standard_ans,standard_ans]
    for response in ans:
        question = response
        events =question_data_extractor.stream({"messages": [HumanMessage(content=question, name="User")]}, config, stream_mode="values")
        for event in events:
            print(event)

question()

[36;1m[1;3m[-1:checkpoint][0m [1mState at the end of step -1:
[0m{'messages': []}
[36;1m[1;3m[0:tasks][0m [1mStarting step 0 with 1 task:
[0m- [32;1m[1;3m__start__[0m -> {'messages': [HumanMessage(content='\nI am keen on learning AI and ML but I am a complete beginner. I hired an expert where he will answer any questions that I have.\nTo extract valuable information from this expert, I need your assistant in making valuable queries that allow me to move from \nbeginner to intermidate level and eventually ML specialist.\n', additional_kwargs={}, response_metadata={}, name='User')]}
[36;1m[1;3m[0:writes][0m [1mFinished step 0 with writes to 1 channel:
[0m- [33;1m[1;3mmessages[0m -> [HumanMessage(content='\nI am keen on learning AI and ML but I am a complete beginner. I hired an expert where he will answer any questions that I have.\nTo extract valuable information from this expert, I need your assistant in making valuable queries that allow me to move from \nbeginner

KeyboardInterrupt: 

I am keen on learning AI and ML but I am a complete beginner. I hired an expert where he will answer any questions that I have. To extract valuable information from this expert, I need your assistant in making valuable queries that allow me to move from beginner to intermidate level and eventually ML specialist.

Ok after these questions, what is the next step? What techincal or ceonceptual questions I should ask?
Ok after these questions, what is the next step? What techincal or ceonceptual questions I should ask?
Ok after these questions, what is the next step? What techincal or ceonceptual questions I should ask?
Ok after these questions, what is the next step? What techincal or ceonceptual questions I should ask?
Ok after these questions, what is the next step? What techincal or ceonceptual questions I should ask?
Ok after these questions, what is the next step? What techincal or ceonceptual questions I should ask?
Ok after these questions, what is the next step? What techincal or ceonceptual questions I should ask?
