In [1]:
from web_scrapper import web_scrapper
from websearch import brave_search
from database import MongoDB
from base_llm import llm_llama3b


[{'Title': 'World University Rankings 2024 | Times Higher Education (THE)', 'URL': 'https://www.timeshighereducation.com/world-university-rankings/2024/world-ranking', 'Description': 'The Times Higher Education World University <strong>Rankings</strong> <strong>2024</strong> include 1,907 universities across 108 countries and regions. The table is based on our new WUR 3.0 methodology, which includes 18 carefully calibrated performance indicators that measure an institution’s performance across five areas: ...'}, {'Title': 'University of Toronto | World University Rankings | THE', 'URL': 'https://www.timeshighereducation.com/world-university-rankings/university-toronto', 'Description': 'Find the latest world <strong>rank</strong> for University of Toronto and key information for prospective students..'}]


In [2]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_ollama.llms import OllamaLLM
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langgraph.graph import END, StateGraph
# For State Graph 
from typing_extensions import TypedDict
import os


In [9]:
from langchain_ollama.llms import OllamaLLM
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from pydantic import BaseModel, Field
from langchain.agents import Tool
from typing import Optional
from langchain_core.tools import StructuredTool
import json
from langchain.output_parsers.structured import (
StructuredOutputParser, ResponseSchema
)


from datetime import datetime

from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers.structured import (
StructuredOutputParser, ResponseSchema
)

In [None]:
from langchain.chains import RetrievalQA
import boto3
from langchain_aws import ChatBedrock
from langchain_ollama.llms import OllamaLLM

def llm_llama3b():
    return OllamaLLM(model="llama3.2:3b", temperature=0.0, top_k=20, top_p= 0.6)

def question_extractor_chain(state):

    question_explorer_prompt = ChatPromptTemplate.from_template(
"""
Role: Search Engine Question Optimizer Robot

Scenario:
User has a question that he/she wants to know from the internet not from you.

Task: Base on user question, try to dive deeper to the intent and optimize user's question for search engine.
You are not responsible in answering user's question. You need to generate 10 search query based on the user's question
that allows user to obtain the best informtaion from the internet through the search engine, NOT FROM YOU. 
The generated questions should not enquire user to input information, instead, they should be ready for search engine. 
Optimize all the questions for search engine that enables user to obtain high quality information. 

This is the user's question: {question}

Instructions:
Think step by step to generate 10 search engine queries at the best of your ability.

Output Format: No greeting, no bold text, no Italic text, just plain text in string
"""
    ).partial(time=datetime.now())

    def question_explorer_format_doc(llm_input):
        print(llm_input)
        response_schemas = [
        ResponseSchema(
        name= "list_of_question", description="This variable stores a list of questions that are expanded from the original question.", type="List[string]" ),
        # ResponseSchema(
        # name="original_question", description="This variable stores the original question. Do not add", type="string" ),
        ]
        parser = StructuredOutputParser.from_response_schemas(response_schemas)

        instruction = parser.get_format_instructions()
        prompt = ChatPromptTemplate.from_template(
            """This is the input: {input} \n\n{instruction}
            Note: All questions including sub-questions should be stored as an individual element, each index of the list should be a string."""
        )

        format_llm = prompt | llm_llama3b()

        ans =  format_llm.invoke({"input": llm_input, "instruction": instruction})
        try:
            text = ans.replace("```json\n", "")
            text = text.replace("```", "")
            output = json.loads(text)
            return output
        except json.JSONDecodeError:
            return ans

    question_explorer_chain = question_explorer_prompt |  llm_llama3b() | question_explorer_format_doc
    
    temp_list = state["list_of_question"]

    for question in state["org_questions"]:
        result = question_explorer_chain.invoke(question)
        temp_list.append(result["list_of_question"])

    return {"list_of_question": temp_list}

In [None]:
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage

def ml_learner_chain(state):
    msg = state["messages"]
    template = ChatPromptTemplate.from_messages(
        [
            ("system", 
"""
Role: ML Learner Assistant

Name: ML Assistant

Scenario: 
Users are keen in AI and Machine Learning and thus find an expert in these two fields to answer any of user's questions.

Task:
Pretend you are a newbie in ML and AI, you will learn with user from the scratch into the core of ML and AI with the help of the expert.
Through continuously expanding the scopes, continue delve into the essential concepts of these two fields
allowing user to learn as a beginner all the way to an AI/ML specialist. Your primary task is to provide questions for users that can help user 
to build a strong understand and essentail skill sets in AI and ML. 
"""), 
("placeholder", "{msg}")
        ]
    )
    def ml_learner_format_doc(llm_input):
        response_schemas = [
        ResponseSchema(
        name= "list_of_question", description="This variable stores a list of questions that are related to AI/ML/Software System", type="List[string]" )
        ]
        parser = StructuredOutputParser.from_response_schemas(response_schemas)

        instruction = parser.get_format_instructions()
        prompt = ChatPromptTemplate.from_template(
            """This is the input: {input} \n\n{instruction}
            Note: All questions including sub-questions should be stored as an individual element, each index of the list should be a string."""
        )

        format_llm = prompt | llm_llama3b()

        ans =  format_llm.invoke({"input": llm_input, "instruction": instruction})
        try:
            text = ans.replace("```json\n", "")
            text = text.replace("```", "")
            print(text)
            output = json.loads(text)
            output["text"] = llm_input
            return output
        
        except json.JSONDecodeError:
            return ans
    chain = template | llm_llama3b() | ml_learner_format_doc
    result = chain.invoke({"msg": msg})
    msg.append(AIMessage(content=result["text"], name = "ML Assistant", additional_kwargs={}, response_metadata={}))
    return {"messages":msg, "org_questions": result["list_of_question"]}

In [25]:
from database import MongoDB
import os
from dotenv import load_dotenv

def synchronization(state):
    load_dotenv()
    MONGODB_USER_NAME = os.getenv("MONGODB_USER_NAME")
    MONGODB_USER_PASSWORD= os.getenv("MONGODB_USER_PASSWORD")

    db = MongoDB(MONGODB_USER_NAME, MONGODB_USER_PASSWORD)
    org_questions = state["org_questions"]
    expanded_questions = state["list_of_question"]

    total = org_questions + expanded_questions
    db.add_question(total)
    db.close_connection()
    

In [None]:
from typing import Literal
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import tools_condition
from typing import Annotated
from typing_extensions import List, TypedDict
from langgraph.graph.message import AnyMessage, add_messages
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from typing import List, TypedDict, Annotated
import asyncio

class QuestionPipeline(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    org_question: List[str]
    list_of_questions: List[str]



In [17]:
import uuid
from langchain_core.runnables import Runnable, RunnableConfig
thread_id = (uuid.uuid4())
config = RunnableConfig(
    configurable={
        "thread_id": thread_id,
    },
    recursion_limit=25
)
msg = []
def ask_questions(thread_id, config):
    _printed = set()
    while True:
        question = input("Enter your question for the Peplink Knowledge Assistant ('exit' to quit): ")
        if question.lower() == 'exit':
            break
        else:
            global msg
            msg.append(HumanMessage(content=question, name="User"))
            msg = ml_learner_chain({"messages": msg})
            print(msg[-1])
                       
ask_questions(thread_id, config)
#what questions should i ask if i want to further expand on each question

content='What an exciting journey you\'re about to embark on! As a newbie, it\'s great that you want to learn from the best (your AI/ML master) and become an all-rounded specialist. Here are some questions you could ask your master to get started:\n\n**Foundational Questions**\n\n1. What are the fundamental differences between Machine Learning (ML) and Artificial Intelligence (AI)? How do they overlap, and when would you use each term?\n2. Can you explain the concept of a "black box" model in ML? How does it relate to interpretability and transparency in AI systems?\n3. What is the difference between supervised, unsupervised, and reinforcement learning? Provide examples for each type.\n\n**Mathematics and Statistics**\n\n1. Master, can you explain the concepts of probability theory, linear algebra, and calculus as they apply to ML/AI? Are there any specific resources or books that I should read?\n2. How do we handle missing data in datasets? What are some common techniques for imputing

In [None]:
import json
from typing import Dict, Set
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
console = Console()
def _process_message(message, _printed: Set, max_length=4500):
    """
    Processes and prints individual messages based on their type.

    Args:
        message: The message object (HumanMessage, AIMessage, ToolMessage).
        _printed (set): A set to track already printed message IDs.
        max_length (int, optional): Maximum length of the message to display. Defaults to 1500.
    """
    # Access attributes directly
    msg_id = getattr(message, "id", None)
    if msg_id and msg_id not in _printed:
        sender = getattr(message, "name", "Unknown")
        content = getattr(message, "content", "")

        # Truncate message if it exceeds max_length
        if len(content) > max_length:
            content = content[:max_length] + " ... (truncated)"

        # Determine the type of message and format accordingly
        if isinstance(message, HumanMessage):
            label = "[bold blue]User[/bold blue]"
            panel = Panel.fit(f"**User:** {content}", title="User Message", style="bright_green")
            console.print(panel)

        elif isinstance(message, AIMessage):
            tool_calls = getattr(message, "additional_kwargs", {}).get('tool_calls', []) if hasattr(message, "additional_kwargs") else []
            if tool_calls:
                # Handle each tool call
                for call in tool_calls:
                    tool_name = call.get('function', {}).get('name', 'Unknown Tool') if isinstance(call, dict) else "Unknown Tool"
                    arguments_json = call.get('function', {}).get('arguments', '{}') if isinstance(call, dict) else '{}'
                    try:
                        arguments = json.loads(arguments_json)
                        tool_args = json.dumps(arguments, indent=2)
                    except json.JSONDecodeError:
                        tool_args = 'Invalid JSON in arguments'

                    tool_info = f"**Tool Used:** {tool_name}\n**Parameters:**\n```json\n{tool_args}\n```"
                    panel = Panel.fit(tool_info, title="Tool Call", style="bright_yellow")
                    console.print(panel)
            else:
                # Regular AI message
                label = "[bold green]Assistant[/bold green]"
                panel = Panel.fit(f"**Assistant:** {content}", title="Assistant Message", style="bright_cyan")
                console.print(panel)

        elif isinstance(message, ToolMessage):
            # Directly handle ToolMessage if it appears as a separate message
            label = f"[bold magenta]{sender} (Tool)[/bold magenta]"
            panel = Panel.fit(f"**Tool Message:** {content}", title="Tool Message", style="bright_yellow")
            console.print(panel)

        else:
            # Other message types
            content = getattr(message, "pretty_repr", lambda html=False: str(message))()
            md = Markdown(content)
            console.print(md)

        _printed.add(msg_id)

for message in messages:
    _process_message(message, set(), 4500)

In [1]:
from typing import Literal
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import tools_condition
from typing import Annotated
from typing_extensions import List, TypedDict
from langgraph.graph.message import AnyMessage, add_messages
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from typing import List, TypedDict, Annotated
import asyncio


class DataPipelineState(TypedDict):
    org_question: str
    list_of_questions: List[str]
    question_answer_pair: dict
    num_of_iteration: int

def explore_question(state):
    question = state["org_question"]
    result = question_explorer_chain.invoke({"question": question})
    try:
        text = result.replace("```json\n", "")
        text = text.replace("```", "")
        output = json.loads(text)
        list_of_questions = output.get('list_of_question', '')    

    except json.JSONDecodeError:
        print("Failed to parse JSON. Here's the raw response:")
        print(result)
    return {"list_of_questions": list_of_questions}


def web_search(state):
    list_of_tasks = []
    for question in state["list_of_questions"]:
        task = asyncio.create_task(brave_search(question))
    brave_search()
    brave_search(query, num_results=15, safesearch="moderate", freshness="2020-10-09to2024-10-09",summary=False):
    web_scrapper()


data_pipieline_graph = StateGraph(DataPipelineState)
data_pipieline_graph.add_node("question_explorer", explore_question)
data_pipieline_graph.add_node("")




SyntaxError: invalid syntax (2474097729.py, line 19)

In [34]:
from typing import Literal
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import tools_condition
from typing import Annotated
from typing_extensions import List, TypedDict
from langgraph.graph.message import AnyMessage, add_messages
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from typing import List, TypedDict, Annotated




class CSAssistantState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    plan: List[str]
    request: List[str]
    trivial: bool
    question_state: bool



ticket_system_assistant_graph = StateGraph(CSAssistantState)
ticket_system_assistant_graph.add_node("intent_check",check_response_intent)
ticket_system_assistant_graph.add_node("filter", trivial)
ticket_system_assistant_graph.add_node("planner", plan_generator)
ticket_system_assistant_graph.add_node("agent", ComplexAssistant(runnable))
ticket_system_assistant_graph.add_node("tools", create_tool_node_with_fallback(assistant_tools))
ticket_system_assistant_graph.add_node("final_check", check_question_status)

ticket_system_assistant_graph.set_entry_point("intent_check")
ticket_system_assistant_graph.add_edge("intent_check", "filter")
ticket_system_assistant_graph.add_conditional_edges("filter", check_scope)
ticket_system_assistant_graph.add_edge("planner", "agent")
ticket_system_assistant_graph.add_conditional_edges("agent", check_tool_call)
ticket_system_assistant_graph.add_edge("final_check", END)
ticket_system_assistant_graph.add_edge("tools", "agent")

memory = SqliteSaver.from_conn_string(":memory:")
ticket_system_assistant = ticket_system_assistant_graph.compile(
    checkpointer = memory,
    debug = True
)

web_scrapper 
    return {"title": title,
            "description": description,
            "author": author,
            "keywords": keywords,
            "url": url}

print(brave_search("What is the ranking of uoft in 2024",num_results=2))
            temp = {"Title": result['title'],
                    "URL": result['url'],
                    "Description": result['description']}

    load_dotenv()
    # Initialize the ApifyClient with your API token
    USER = os.getenv("MONGODB_USER_NAME")
    PASSWORD = os.getenv("MONGODB_USER_PASSWORD")

IndentationError: unexpected indent (3015165442.py, line 2)