# Enter your question

In [13]:
user_query = "What do we know about the energy sector from March of 2023?."

# Working Notebook

### Set OpenAI API Key

In [14]:
# Import os to get the environment variables
import os

# Gives access to OpenAI API key as a variable without exposing it to the public
from config import OPENAI_API_KEY

### Import LangChain classes

In [15]:
# Import the ChatOpenAI class with will be our LLM/chatbot interface
from langchain.chat_models import ChatOpenAI

# Import the message types that we will use to interact with the chatbot
from langchain.schema import HumanMessage, SystemMessage, AIMessage

# Import the GoogleDriveLoader class that will allow us to load documents from Google Drive
from langchain.document_loaders import GoogleDriveLoader

# Import text splitter class that takes a string and returns chunks of text
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Import the Document class that will allow us to create a Document object for each chunk
from langchain.schema import Document

# Import the class that will perform QA on the documents
from langchain.chains.question_answering import load_qa_chain

# Import the class that will respond to queries that are not answered by the QA chain
from langchain.chains import LLMChain

# Import class for Prompt Templates, 
from langchain import PromptTemplate

# Import class for LLMChain
from langchain import LLMChain

# Import the classes for the ChatOpenAI-style prompts
from langchain.prompts import SystemMessagePromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate

# Import the class to create custom tools
from langchain.agents import Tool

# Import the class handles that loops actions and observations until finished
from langchain.agents.agent import AgentExecutor

# Installs the PyPDF2 library
%pip install PyPDF2

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 23.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### My basic prompt template

In [16]:
my_prompt_template=PromptTemplate(
        input_variables=["user_input"],
        template="""
        You are an helpful AI chatbot that is able to answer questions. Answer all questions fully and honestly.
    
        INSTRUCTIONS = USER will ask you a question or otherwise engage with you. Before you respond, think about this: chatbots are known to get answers wrong, and even make up answers. Did you make any mistakes? Double check yourself, line by line, and step by step. Then respond to the USER the best that you can. If you are requested for information that you can't provide, admit that you don't know.
    
        (USER): {user_input}
        """        
)

### Basic Instance Creations

#### LLM Instances

In [17]:
# Create a chatbot instance of ChatGPT-3.5, temperature of 0 for stability
chat_llm_0 = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY,)

# Create a chatbot instance of ChatGPT-3.5, temperature of 0.7 for more creativity
chat_llm_07 = ChatOpenAI(temperature=0.7, openai_api_key=OPENAI_API_KEY,)

# Create a chatbot instance of ChatGPT-3.5, temperature of 0 for stability
agent_llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)

#### Define GDrive QA Chain

In [18]:
# Create an instance of the QA chain to be used with GDrive documents
gdrive_qa_chain = load_qa_chain(llm=chat_llm_0, chain_type="refine", verbose=True,)

#### GDrive Loader Instance

In [19]:
# Create a GoogleDriveLoader instance that will load documents from a Google Drive folder
gdrive_loader = GoogleDriveLoader(
    # server to server authentication 
    # service_account_key=pathlib.Path("../../credentials.json"),
    # user authentication
    credentials_path=("../../credentials.json"),
    # return token path
    token_path=("../../.credentials/token.json"),
    # folder_id is the id of the folder that contains the documents to load
    # QUESTION: Does folder_id only pull GDrive docs? (Ex. ignore .docx files)
    folder_id='1Al0fkVnBh3_S_eKE-qaO4Akrbxvfh7pg'
    # cannot use both folders and documents/files
    # A document typically refers to a file that was created and edited using one of Google's online document editors
    # document_ids=["document_id_1", "document_id_2", ...],
    # A "file", on the other hand, can refer to any type of file that can be stored on Google Drive, such as a PDF, an image, or a video. When loaded by the GoogleDriveLoader, these files are converted to Document objects that contain the text content of the file (if applicable) and metadata about the file, such as its title and source URL.
    # file_ids=["file_id_1", "file_id_2", ...],
)

### Prepare variables and documents

#### Format GDrive docs into readable chunks stored in working memory

In [20]:
# Load the Google Drive documents into a variable
# gdrive_text = raw text string of all the Google Drive type docs found in folder
gdrive_text = gdrive_loader.load()

# Create an instance of the RecursiveCharacterTextSplitter class that will allow us to split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1024,
    chunk_overlap = 64
)

# Split string into chunks of 1024 characters with 64 character overlap
split_gdrive_text = text_splitter.split_documents(gdrive_text)

### Create Tools

#### Create QA Tool for GDrive Documents

In [21]:
# Define the function that will be wrapped in a tool for the agent
def gdrive_function(user_query):
    return gdrive_qa_chain.run(input_documents=split_gdrive_text, question=user_query)

# Create a custom tool with the function
gdrive_tool = Tool(
    name="Google Drive Search Tool",
    func=gdrive_function,
    description="""
        Use this tool to sample and query preformatted documents that hold information that other LLMs may not have access to. You may need multiple queries to get a complete picture. This tool leverages the gpt-3.5-turbo model to answer questions about the documents. This tool will take a message as an input, and return a message as an output.
        """
)


#### Test GDrive Tool

In [22]:

# Test the tool
output_string = gdrive_tool.run(user_query)
print(output_string)



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Context information is below. 
---------------------
﻿B2oom0and2Bust3Coal


Global Energy Monitor, CREA, E3G, Reclaim Finance, Sierra Club, SFOC,  
Kiko Network, CAN Europe, Bangladesh Groups, ACJCE, Chile Sustentable


TRACKING THE GLOBAL COAL PLANT PIPELINE




BOOM AND BUST COAL 2023


GLOBAL ENERGY MONITOR, CREA, E3G, RECLAIM FINANCE, SIERRA CLUB, SFOC, REPORT | APRIL 2023 | 2
KIKO NETWORK, CAN EUROPE, BANGLADESH GROUPS, ACJCE, CHILE SUSTENTABLE


ABOUT THE COVER
The cover photo shows activists staring down a bucket wheel excavator at 
the Garzweiler open-pit coal mine in Lützerath, Germany, where the mine’s 
planned expansion would raze nearby villages and evict local residents. 
Photo © Bernd Lauter / Greenpeace, January 2023.
GLOBAL ENERGY MONITOR
Global Energy Monitor (GEM) develops and 
shares information on energy projects in 
support

KeyboardInterrupt: 

#### Create GPT Tool for all other queries

##### Define prompt for GPT Tool

In [19]:
gpt_tool_prompt = HumanMessagePromptTemplate(prompt=my_prompt_template)

# creates a ChatPromptTemplate object from a list of messages (in this case, only 1 message.) The purpose of using ChatPromptTemplate.from_messages([human_message_prompt]) is to convert a list of messages (in this case, just one message) into a prompt template suitable for a chat-based language model like ChatOpenAI. This allows the model to understand the context and structure of the conversation, which can help it generate more appropriate and context-aware responses.
chat_prompt_template = ChatPromptTemplate.from_messages([gpt_tool_prompt])



##### Create an instance of the LLMChain class that will be wrapped in a tool

In [20]:
# Create an instance of the LLMChain class that will be wrapped in a tool
gpt_chain = LLMChain(llm=chat_llm_07, prompt=chat_prompt_template, verbose=False)

##### Wrap gpt chain in a tool

In [44]:
# Define the function that will be wrapped in a tool for the agent
def gpt_function(user_input):
    return gpt_chain.run(user_input=user_input)

# Create a custom tool with the function
gpt_tool = Tool(
    name="ChatGPT-powered Question and Answer Tool",
    func=gpt_function,
    description="Use this tool to send to a modern gpt-3.5-turbo Large Language Model. The tool will then then return a message back to you. This tool is good for most general questions, and is able to take complex inputs."
)

from langchain.tools.base import BaseTool

# from langchain.tools.base import register_tool
# langchain.register_tool(tool_instance)



#### Test the Tool

In [45]:

# Test the tool
# output_string = gpt_tool.run(user_query)
# print(output_string)

### Create Agent Structures

#### Imports

In [23]:
import re
from typing import List, Union
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, LLMChain
from langchain.schema import AgentAction, AgentFinish



#### Agent Prompt Template

In [None]:
# Set up the base template
agent_prompt_template = """
    Answer the USER's message as best you can. You have access to gpt_tool and to gdrive_tool. 

    Use the following format:

    Query: the input message you must respond to
    Thought: you should always think about what to do
    Action: the action to take, should either be the gpt_tool or the gdrive_tool
    Action Input: the input to the action
    Observation: the result of the action
    ... (this Thought/Action/Action Input/Observation can repeat 5 times)
    Thought: I now know the final answer
    Final Answer: the final answer to the original input question

    Begin!

    Question: {input}
"""



#### Custom Prompt Template

In [25]:
# Set up a custom prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        # Add tool descriptions to kwargs
        for tool in self.tools:
            kwargs[f"{tool.name}_description"] = tool.description
        return self.template.format(**kwargs)

# Create a CustomPromptTemplate instance
agent_prompt = CustomPromptTemplate(
    template=agent_prompt_template,
    tools=[gdrive_tool, gpt_tool],
    input_variables=["input", "intermediate_steps"],
)


#### Create Custom Output Parser

In [26]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if the agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

# Create a CustomOutputParser instance
output_parser = CustomOutputParser()


##### Experiment from https://github.com/hwchase17/langchain/issues/1559#issuecomment-1502902553

In [27]:
'''
tool_names = [tool.name for tool in [gdrive_tool, gpt_tool]]

from langchain.agents import initialize_agent

agent = initialize_agent(
            tools=tool_names,
            llm=chat_llm_07,
            agent="chat-zero-shot-react-description",
            verbose=True
        )
'''

'\ntool_names = [tool.name for tool in [gdrive_tool, gpt_tool]]\n\nfrom langchain.agents import initialize_agent\n\nagent = initialize_agent(\n            tools=tool_names,\n            llm=chat_llm_07,\n            agent="chat-zero-shot-react-description",\n            verbose=True\n        )\n'

#### Create Agent

##### Experiment

In [28]:
from langchain.agents import initialize_agent
from langchain.agents import Tool

tools =[Tool(name="Google Drive Search Tool", 
        func=gdrive_tool,
        description="""
        Use this tool to access recent information about the global energy sector. This tool will take a message as an input, and return a message as an output.
        """), 
    Tool(name="ChatGPT-powered Question and Answer Tool", 
         func=gpt_tool,
         description="""
         Use this tool to send to a modern gpt-3.5-turbo Large Language Model. The tool will then then return a message back to you. This tool is good for most general questions, and is able to take complex inputs.""")]

agent = initialize_agent(
            tools=tools,
            llm=chat_llm_07,
            agent="chat-zero-shot-react-description",
            verbose=True,
            max_length=3600
        )

# TEST IT!!

In [None]:
agent(user_query)