In [1]:
import sys
if 'google.colab' in sys.modules:
    !pip install langchain google-cloud-aiplatform rich
    !pip install wikipedia
    !pip install google_auth langchain google-cloud-aiplatform ric

# Imports

In [2]:
import os
import requests
from google.cloud import aiplatform
from langchain.agents import (AgentExecutor, AgentType, BaseSingleActionAgent,
                              Tool, initialize_agent)
from langchain.llms import VertexAI
from langchain.memory import ConversationBufferMemory
from langchain.tools import tool
from pydantic import BaseModel, Field
from rich import print
from rich.console import Console
from rich.progress import Progress, track
from rich.table import Table

# Google Authentication

In [3]:
tool_url = None #!TODO: set this to the tool url if using Colab
try:
    from google.colab import auth as google_auth
    google_auth.authenticate_user()
except:
    import dotenv
    dotenv.load_dotenv()
    tool_url = str(os.getenv('TOOL_URL'))
    pass

In [4]:
#! Change the following to your own project and location
aiplatform.init(project="aerobic-gantry-387923", location="us-central1")
llm = VertexAI(
    max_output_tokens = 512,
    top_k = 40,
    top_p = 0.9,
    temperature = 0.2,
    model_name = "chat-bison" #["text-bison" "chat-bison"]
)

ValidationError: 1 validation error for VertexAI
__root__
  Unknown model publishers/google/models/chat-bison; {'gs://google-cloud-aiplatform/schema/predict/instance/text_generation_1.0.0.yaml': <class 'vertexai.language_models._language_models._PreviewTextGenerationModel'>} (type=value_error)

# Prompt Template

In [None]:
from langchain.agents import (
    Tool,
    AgentExecutor,
    LLMSingleActionAgent,
    AgentOutputParser,
)
from langchain.prompts import StringPromptTemplate
# Set up the base template
template = """Act as a University of Texas at Dallas(sometime refered as UT Dallas or UTD) Guidance Counselor, answering the following questions if they are related to UT Dallas as best you can. You should not use the same action and input twice when using tools. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to speak as a compasstionate University of Texas Guidance Counselor when giving your final answer. Use lots of "Arg"s

Question: {input}
{agent_scratchpad}"""

## Custome Prompt Tool Retreiver

In [None]:
from typing import Callable
from langchain.prompts import StringPromptTemplate


# Set up a prompt template
class CustomPromptTemplateToolRetreiver(StringPromptTemplate):
    # The template to use
    template: str
    ############## NEW ######################
    # The list of tools available
    tools_getter: Callable

    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        ############## NEW ######################
        tools = self.tools_getter(kwargs["input"])
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join(
            [f"{tool.name}: {tool.description}" for tool in tools]
        )
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in tools])
        return self.template.format(**kwargs)

## Custome Prompt Tool

In [None]:
from typing import List, Union
# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]

    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        return self.template.format(**kwargs)

# Output Parser

In [None]:
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
import re
from langchain.schema import AgentAction, AgentFinish, OutputParserException
class CustomOutputParser(AgentOutputParser):

    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise OutputParserException(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
output_parser = CustomOutputParser()

# Vector Stores
1. Websites  using The Webase Loader

In [None]:
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
'''
urls = [
    "https://www.utdallas.edu/galaxy/",
    "https://www.utdallas.edu/costs-scholarships-aid/"
    "https://www.utdallas.edu/academics/"
]
loader = WebBaseLoader(urls)


# fixes a bug with asyncio and jupyter
import nest_asyncio

nest_asyncio.apply()
docs = loader.aload()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings, collection_name="state-of-union")
'''

'\nurls = [\n    "https://www.utdallas.edu/galaxy/",\n    "https://www.utdallas.edu/costs-scholarships-aid/"\n    "https://www.utdallas.edu/academics/"\n]\nloader = WebBaseLoader(urls)\n\n\n# fixes a bug with asyncio and jupyter\nimport nest_asyncio\n\nnest_asyncio.apply()\ndocs = loader.aload()\n\ntext_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\ntexts = text_splitter.split_documents(docs)\n\nembeddings = OpenAIEmbeddings()\ndocsearch = Chroma.from_documents(texts, embeddings, collection_name="state-of-union")\n'

In [None]:
'''
state_of_union = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=docsearch.as_retriever()
)
'''

'\nstate_of_union = RetrievalQA.from_chain_type(\n    llm=llm, chain_type="stuff", retriever=docsearch.as_retriever()\n)\n'

# Tools Function

In [None]:
class SearchInput(BaseModel):
    query: str = Field(description="should be a search query")

def request_error_handler(url: str, params={}) -> str:
    try:
        response = requests.get(url, params)
        response.raise_for_status()  # Raise an HTTPError if the status code is not in the 200 range
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        return e

## Get a Tool Function

In [None]:
def get_tools(query):
    docs = retriever.get_relevant_documents(query)
    return [ALL_TOOLS[d.metadata["index"]] for d in docs]

## Courses

In [None]:
def get_courses_from_query(query: str) -> str:
    url = f"https://{tool_url}/get-possible-courses/{query}"
    return request_error_handler(url)


## Degree

In [None]:
def get_degrees_from_query(query: str) -> str:
    url = f"https://{tool_url}/get-possible-degrees/{query}"
    return request_error_handler(url)


## Utdallas Search

In [None]:
def utdallas_search(query: str) -> str:
    url = f"https://{tool_url}/search/{query}"
    return request_error_handler(url)

def access_utdallas_site(url_to_search: str) -> str:
    url = f"https://{tool_url}/access/"
    params = {
        "url": url_to_search
    }
    return request_error_handler(url, params)



## Wikipedia

In [None]:
from langchain.utilities import WikipediaAPIWrapper
wikipedia = WikipediaAPIWrapper()
def run_wikepedia(query: str) -> str:
    return wikipedia.run(query)

## Dictionary

In [None]:
def dictionary_search(query: str) -> str:
    url = f"https://{tool_url}/dictionary/{query}"
    return request_error_handler(url)

# Tools NAME and description

In [None]:
tools = [
    Tool(
        name = "UTD Degree API",
        func = get_degrees_from_query,
        description = "Suppose you have a keyword that sound like a UT Dallas' Degrees, Majors, Minors, and Certificates. This tool will return possible url, title and snippet from the query from search engine.",
    ),
    Tool(
        name = "UTD Course API",
        func = get_courses_from_query,
        description = "Suppose you have a keyword that sound like a UT Dallas' courses. This tool will return course numbers/codes where that keyword exists in the course title.",
    ),

    Tool(
        name = "UTD Search",
        func = utdallas_search,
        description = "This tool should not be used and should be used only as a last resort; it will search every UT Dallas page. This function require a query string and return possible links and snippet contents. If you want to find classes or degrees, there are better tools."
    ),
    Tool(
        name = "Extract text from UTD Search url links",
        func = access_utdallas_site,
        description = "If you have a url string contain utdallas.edu, use this function to extract text content"
    ),
    Tool(
        name = "Wikipedia API",
        func = run_wikepedia,
        description  = "This tool will return the first paragraph of the wikipedia page of the query."
    ),
        Tool(
        name = "Dictionary Search",
        func = dictionary_search,
        description = "If you need the definition of a word or phrase unrelated to UT Dallas, use this function to retrieve the definition. Do not use for anything related to UT Dallas' courses.",
    ),
]

## Set up Tools for Retreiver

In [None]:
#from langchain.prompts import CustomPromptTemplateToolRetreiver
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

# Initialize Agents

## Zero Shot ReAcT Agents


In [None]:
'''
agent = initialize_agent(
    tools, llm, agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, memory = memory ,
    max_execution_time = 10 , return_intermediate_steps=True,
)
'''
memory = ConversationBufferMemory(memory_key="chat_history")
zero_shot_react_agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    return_intermediate_steps=True,
    max_execution_time = 20 ,
)

## Plan and execute

In [None]:
from langchain.experimental import plan_and_execute
from langchain.chat_models import ChatOpenAI
from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.llms import OpenAI
planner = load_chat_planner(llm)
executor = load_agent_executor(llm, tools = tools , verbose=True)
plan_and_execute = PlanAndExecute (
    planner=planner,
    executor=executor,
    verbose=True,
    max_execution_time = 20 ,
    return_intermediate_steps=True,
)




## Custome Agents

In [None]:
# LLM chain consisting of the LLM and a prompt
from langchain import  LLMChain
llm_chain = LLMChain(llm=llm, prompt=prompt)

tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain,
    output_parser=output_parser,
    stop=["\nObservation:"],
    allowed_tools=tool_names , 
    max_execution_time = 20 ,
)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

# Testing Agents

## Tell me the Difference between a courses


In [None]:
response = zero_shot_react_agent(
        {
            "input": "What is the difference between Math 2417 and Math 2419"
        }
)
print(
    response["output"]
)



[1m> Entering new  chain...[0m
[32;1m[1;3mI don't know what Math 2417 and Math 2419 are. I will try to find them.
Action: UTD Course API
Action Input: Math 2417[0m
Observation: [33;1m[1;3m{'items': [{'title': 'Calculus I - UT Dallas 2022 Undergraduate Catalog - The University ...', 'snippet': 'MATH2417 - Calculus IMATH 2417 Calculus I (4 semester credit hours) Functions, limits, continuity, differentiation; integration of function of one variable; logarithmic, exponential, and inverse trigonometric functions; techniques of integration, and applications. Three lecture hours and two discussion hours a week; problem section required with MATH 2417, and will also be registered for exam section. Not all MATH/STAT courses may be counted toward various degree plans. Please consult your degree plan to determine the appropriate MATH/STAT course requirements. Prerequisite: ALEKS score required or a grade of at least a C- in MATH 2306 or MATH 2312. (3-2) S'}, {'title': 'Calculus I - UT D

In [None]:
try:
  plan_execute_response = plan_and_execute(
    "What is the difference between Math 2417 and Math 2419"
  )
  print(
    plan_execute_response["output"]
  )
except Exception as e:
  print(e)

## Contacting People at my University

In [None]:
zero_shot_react_agent_response = zero_shot_react_agent(
    {
        "input": "What is the email to contact for Accounting advising?"
    }
)
print(
    zero_shot_react_agent_response["output"]
)


## How do I apply at UT Dallas

In [None]:
zero_shot_react_agent_response = zero_shot_react_agent(
    {
        "input": "How do I applying to UTD?"
    }
)

## Who are the professors who research NLP at UTD

In [None]:
zero_shot_react_agent_response = zero_shot_react_agent(
    {
        "input": "Who are the professors who research NLP at UTD other than Vincent Ng"
    }
)

## Can you teel me the Difference Cs and SW at UTD

In [None]:
zero_shot_react_agent_response = zero_shot_react_agent(
    {
        "input": "Can you tell me the difference between Computer Science and  Software Engineering at UTD?"
    }
)

## Build Me a Degree Plan