agent using retriever

In [13]:
import os
import pandas as pd
from dataclasses import dataclass
from dotenv import load_dotenv
from tqdm import tqdm

from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings

In [14]:
load_dotenv()
embedding = OpenAIEmbeddings()
import chromadb
chroma_client = chromadb.HttpClient(host="localhost", port="8000")

chromadb.__version__
from langchain.vectorstores import Chroma

collection_name = "geo_test"

lc_client = Chroma(client=chroma_client,
                   collection_name=collection_name,
                   embedding_function=embedding)

create the retriever

In [15]:
from langchain.agents.agent_toolkits import create_retriever_tool

retriever = lc_client.as_retriever(search_kwargs={"k": 8})

tool = create_retriever_tool(
    retriever, 
    "search_geologic_data",
    "Searches and returns documents regarding geological features, entities and concepts."
)
tools = [tool]

prebuilt components:

In [16]:
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent


from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature = 0)

In [None]:

agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True)
result = agent_executor({"input": "hi, im bob"})
result

In [5]:
result = agent_executor({"input": "whats my name"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is Bob.[0m

[1m> Finished chain.[0m


{'input': 'whats my name',
 'chat_history': [HumanMessage(content='hi, im bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False),
  HumanMessage(content='whats my name', additional_kwargs={}, example=False),
  AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False)],
 'output': 'Your name is Bob.',
 'intermediate_steps': []}

In [6]:
result = agent_executor({"input": "I would like to know about geologic features"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mSure, I can help you with that. Geologic features refer to various physical formations and structures found on the Earth's surface that are a result of geological processes. These features can include mountains, valleys, plateaus, canyons, caves, rivers, lakes, glaciers, deserts, and more.

Is there a specific geologic feature you would like to know more about?[0m

[1m> Finished chain.[0m


{'input': 'I would like to know about geologic features',
 'chat_history': [HumanMessage(content='hi, im bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False),
  HumanMessage(content='whats my name', additional_kwargs={}, example=False),
  AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False),
  HumanMessage(content='I would like to know about geologic features', additional_kwargs={}, example=False),
  AIMessage(content="Sure, I can help you with that. Geologic features refer to various physical formations and structures found on the Earth's surface that are a result of geological processes. These features can include mountains, valleys, plateaus, canyons, caves, rivers, lakes, glaciers, deserts, and more.\n\nIs there a specific geologic feature you would like to know more about?", additional_kwargs={}, example=False)],
 'output': "Sure, I can help you with that. Geologic feat

In [7]:
result = agent_executor({"input": "What types of rock do you know about?"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThere are three main types of rocks: igneous, sedimentary, and metamorphic. Here's a brief description of each type:

1. Igneous Rocks: These rocks are formed from the cooling and solidification of molten magma or lava. Igneous rocks can be further classified into two categories:
   - Intrusive Igneous Rocks: These rocks form when magma cools and solidifies beneath the Earth's surface. Examples include granite and diorite.
   - Extrusive Igneous Rocks: These rocks form when lava cools and solidifies on the Earth's surface. Examples include basalt and obsidian.

2. Sedimentary Rocks: These rocks are formed from the accumulation and compaction of sediments over time. Sediments can include fragments of other rocks, minerals, and organic matter. Sedimentary rocks can be further classified into three categories:
   - Clastic Sedimentary Rocks: These rocks are formed from the accumulation of fragments of other rocks. Examples inclu

{'input': 'What types of rock do you know about?',
 'chat_history': [HumanMessage(content='hi, im bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False),
  HumanMessage(content='whats my name', additional_kwargs={}, example=False),
  AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False),
  HumanMessage(content='I would like to know about geologic features', additional_kwargs={}, example=False),
  AIMessage(content="Sure, I can help you with that. Geologic features refer to various physical formations and structures found on the Earth's surface that are a result of geological processes. These features can include mountains, valleys, plateaus, canyons, caves, rivers, lakes, glaciers, deserts, and more.\n\nIs there a specific geologic feature you would like to know more about?", additional_kwargs={}, example=False),
  HumanMessage(content='What types of rock do you know about?', a

from scratch:

In [17]:
# This is needed for both the memory and the prompt
memory_key = "history"

from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory

memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder
system_message = SystemMessage(
        content=(
            "Do your best to answer the questions. "
            "You must use the tools available to look up "
            "relevant information, if neccessary"
        )
)
prompt = OpenAIFunctionsAgent.create_prompt(
        system_message=system_message,
        extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)]
    )
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, 
                               verbose=True, return_intermediate_steps=True)

In [19]:
"intermediate_steps" in agent_executor.output_keys

True

In [9]:
result = agent_executor({"input": "hi, im bob"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHello Bob! How can I assist you today?[0m

[1m> Finished chain.[0m


{'input': 'hi, im bob',
 'history': [HumanMessage(content='hi, im bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False)],
 'output': 'Hello Bob! How can I assist you today?',
 'intermediate_steps': []}

In [10]:
result = agent_executor({"input": "whats my name"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYour name is Bob.[0m

[1m> Finished chain.[0m


{'input': 'whats my name',
 'history': [HumanMessage(content='hi, im bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False),
  HumanMessage(content='whats my name', additional_kwargs={}, example=False),
  AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False)],
 'output': 'Your name is Bob.',
 'intermediate_steps': []}

In [11]:
result = agent_executor({"input": "tell me about the Skalica Formation"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_geologic_data` with `{'arg1': 'Skalica Formation'}`


[0m[36;1m[1;3m[Document(page_content='\nSkalica-Formation  (de) / Skalica Formation  (en)\nGBA Status:\nofficial use\nDie Skalica-Formation wurde im Wiener Becken hangend der Studienka-Fm bzw. der Holíč-Fm. (sandige mergelige Tone und Silte) und liegend der Záhorie-Fm. abgelagert. Sie verzahnt lateral proximal mit den grobklastischeren Gajary- und Karlova Ves Mb. Alter: Spätes Serravall, Sarmat (Piller et al., 2004). Environment: Offshore marin. Hauptlithologie: Monotone, graugrüne, mergelige Tone mit Zwischenlagen von Sanden und Konglomeraten. (Kováč et al., 2004)\n[PDF]\xa0-\n[Catalog]\n[PDF]\xa0-\n[Catalog]\nConcept relations\nbroader: [Vienna Basin, Eisenstadt Basin]\ncloseMatch: Skalica Formation (DBpedia)\n', metadata={'source': 'http://resource.geolba.ac.at/GeologicUnit/853'}), Document(page_content="\nKollerkogel Formation  (en)\nKollerkogel-F

{'input': 'tell me about the Skalica Formation',
 'history': [HumanMessage(content='hi, im bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False),
  HumanMessage(content='whats my name', additional_kwargs={}, example=False),
  AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False),
  HumanMessage(content='tell me about the Skalica Formation', additional_kwargs={}, example=False),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'search_geologic_data', 'arguments': '{\n  "arg1": "Skalica Formation"\n}'}}, example=False),
  FunctionMessage(content='[Document(page_content=\'\\nSkalica-Formation  (de) / Skalica Formation  (en)\\nGBA Status:\\nofficial use\\nDie Skalica-Formation wurde im Wiener Becken hangend der Studienka-Fm bzw. der Holíč-Fm. (sandige mergelige Tone und Silte) und liegend der Záhorie-Fm. abgelagert. Sie verzahnt lateral proximal mit den grobkla

In [12]:
result = agent_executor({"input": "please provide the source link from your last message"})
result



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mCertainly! Here is the source link for more information about the Skalica Formation: [Skalica Formation](http://resource.geolba.ac.at/GeologicUnit/853)[0m

[1m> Finished chain.[0m


{'input': 'please provide the source link from your last message',
 'history': [HumanMessage(content='hi, im bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False),
  HumanMessage(content='whats my name', additional_kwargs={}, example=False),
  AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False),
  HumanMessage(content='tell me about the Skalica Formation', additional_kwargs={}, example=False),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'search_geologic_data', 'arguments': '{\n  "arg1": "Skalica Formation"\n}'}}, example=False),
  FunctionMessage(content='[Document(page_content=\'\\nSkalica-Formation  (de) / Skalica Formation  (en)\\nGBA Status:\\nofficial use\\nDie Skalica-Formation wurde im Wiener Becken hangend der Studienka-Fm bzw. der Holíč-Fm. (sandige mergelige Tone und Silte) und liegend der Záhorie-Fm. abgelagert. Sie verzahnt lateral proxim

In [9]:
from langchain.schema.messages import HumanMessage, AIMessage, FunctionMessage
d = {'input': 'tell me about the Skalica Formation. Make sure to include the link to the resource,', 'history': [HumanMessage(content='my name is bob', additional_kwargs={}, example=False), AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False), HumanMessage(content='what is my name?', additional_kwargs={}, example=False), AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False), HumanMessage(content='tell me about the Skalica Formation', additional_kwargs={}, example=False), AIMessage(content='', additional_kwargs={'function_call': {'name': 'search_geologic_data', 'arguments': '{\n  "arg1": "Skalica Formation"\n}'}}, example=False), FunctionMessage(content='[Document(page_content=\'\\nSkalica-Formation  (de) / Skalica Formation  (en)\\nGBA Status:\\nofficial use\\nDie Skalica-Formation wurde im Wiener Becken hangend der Studienka-Fm bzw. der Holíč-Fm. (sandige mergelige Tone und Silte) und liegend der Záhorie-Fm. abgelagert. Sie verzahnt lateral proximal mit den grobklastischeren Gajary- und Karlova Ves Mb. Alter: Spätes Serravall, Sarmat (Piller et al., 2004). Environment: Offshore marin. Hauptlithologie: Monotone, graugrüne, mergelige Tone mit Zwischenlagen von Sanden und Konglomeraten. (Kováč et al., 2004)\\n[PDF]\\xa0-\\n[Catalog]\\n[PDF]\\xa0-\\n[Catalog]\\nConcept relations\\nbroader: [Vienna Basin, Eisenstadt Basin]\\ncloseMatch: Skalica Formation (DBpedia)\\n\', metadata={\'source\': \'http://resource.geolba.ac.at/GeologicUnit/853\'}), Document(page_content="\\nKollerkogel Formation  (en)\\nKollerkogel-Formation  (de) / Notation:\\n566\\nValidity: Valid; first entry by Suess (1868: \'ungeschichteter, … lichtgrauer Kalkstein, welcher … an den Westhängen des Kollerberges … entblößt ist\'); formalized by Flügel (2000: p. 25-26; Kollerkogel-Formation).; Chronostratigraphic age: Givetian - Frasnian.; Lithology: Grey dolomites with biolaminations, light bluish limestones (mostly mudstones), locally bioclastic limestones with chert nodules.; (Piller et al., 2014)\\n[PDF]\\xa0-\\n[Catalog]\\n[PDF]\\n[PDF]\\xa0-\\n[Catalog]\\nConcept relations\\nbroader: [Graz Palaeozoic]\\n", metadata={\'source\': \'http://resource.geolba.ac.at/GeologicUnit/651\'}), Document(page_content=\'\\nKellerwand Formation  (en)\\nKellerwand-Formation  (de) / Pal Grande Calcarenite [partim]  (de) / Kellerwand-Kalk [partim]  (de) / Main outcrop areas: The Kellerwand Formation crops out in the Rauchkofel (2,460 m) area, in the south side of the Valentintal and between Mt. Cellon/Creta di Collinetta (2,238 m) and Creta di Timau (2,218 m). Lithology: The Kellerwand Formation (thickness: about 95 m) consists of four well-bedded facies listed in order of decreasing abundance. (Facies A): medium dark gray, thin to medium-bedded, wackestone to packstone with moderate yellow thin laminae of silt; (Facies B): medium dark gray, thin to thick bedded, often fining upward, moderately to well sorted and locally laminated grainstone (from very coarse up to granules to fine-grained) to packstone; (Facies C): medium light gray, medium to thick bedded, moderately to well sorted crinoidal-bearing grainstone (biosparite); Facies D: medium dark gray, thick bedded, poorly sorted coral-bearing rudstone with clasts up to about 10 cm of diameter and poorly sorted very coarse to fine grainstone matrix. Chronostratigraphic age: Devonian: Pragian to Emsian (Kreutzer, 1990, 1992a, b). (Pondrelli et al., 2015a)\\n[PDF]\\xa0-\\n[Catalog]\\nConcept relations\\nbroader: [Carnic Alps]\\nrelated: Kellerwand Limestone\\n\', metadata={\'source\': \'http://resource.geolba.ac.at/GeologicUnit/237\'}), Document(page_content=\'\\nDobranberg Formation  (en)\\nDobranberg-Formation  (de) / GBA Status:\\nofficial use\\nDie Dobranberg-Formation bildet den obersten Abschnitt der eozänen Guttaring-Gruppe und setzt sich aus sehr fossilreichen Plattformkalken zusammen. Von der Sittenberg-Fm. im Liegenden kann sie durch den reduzierten klastischen Eintrag unterschieden werden. Alter: mittleres - spätes Lutet (Piller et al., 2004). Environment: Karbonatplattform. Hauptlithologie: Fossilreiche Kalke, Fossilschuttkalke. (Van Hinte, 1963; Wilkens, 1991; Hillebrandt, 1993; Krainer, 2003)\\n[PDF]\\xa0-\\n[Catalog]\\nConcept relations\\nbroader: [Central Alps]\\n\', metadata={\'source\': \'http://resource.geolba.ac.at/GeologicUnit/485\'})]', additional_kwargs={}, name='search_geologic_data'), AIMessage(content='The Skalica Formation is a geological formation that was deposited in the Vienna Basin. It is located above the Studienka Formation and the Holíč Formation, and below the Záhorie Formation. The formation is characterized by monotone, gray-green, marly clays with interbeds of sands and conglomerates. It was formed during the Late Serravallian to Sarmatian age and represents an offshore marine environment. The Skalica Formation is part of the broader Vienna Basin and Eisenstadt Basin.', additional_kwargs={}, example=False), HumanMessage(content='please provide the source link from your last message', additional_kwargs={}, example=False), AIMessage(content='Certainly! Here is the source link for the information about the Skalica Formation: [Skalica Formation Source](http://resource.geolba.ac.at/GeologicUnit/853)', additional_kwargs={}, example=False), HumanMessage(content='tell me about the Skalica Formation. Make sure to include the link to the resource,', additional_kwargs={}, example=False), AIMessage(content='The Skalica Formation is a geological formation that was deposited in the Vienna Basin. It is located above the Studienka Formation and the Holíč Formation, and below the Záhorie Formation. The formation is characterized by monotone, gray-green, marly clays with interbeds of sands and conglomerates. It was formed during the Late Serravallian to Sarmatian age and represents an offshore marine environment. The Skalica Formation is part of the broader Vienna Basin and Eisenstadt Basin.\n\nYou can find more information about the Skalica Formation [here](http://resource.geolba.ac.at/GeologicUnit/853).', additional_kwargs={}, example=False)], 'output': 'The Skalica Formation is a geological formation that was deposited in the Vienna Basin. It is located above the Studienka Formation and the Holíč Formation, and below the Záhorie Formation. The formation is characterized by monotone, gray-green, marly clays with interbeds of sands and conglomerates. It was formed during the Late Serravallian to Sarmatian age and represents an offshore marine environment. The Skalica Formation is part of the broader Vienna Basin and Eisenstadt Basin.\n\nYou can find more information about the Skalica Formation [here](http://resource.geolba.ac.at/GeologicUnit/853).', 'intermediate_steps': []}

In [20]:
d

{'input': 'tell me about the Skalica Formation. Make sure to include the link to the resource,',
 'history': [HumanMessage(content='my name is bob', additional_kwargs={}, example=False),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, example=False),
  HumanMessage(content='what is my name?', additional_kwargs={}, example=False),
  AIMessage(content='Your name is Bob.', additional_kwargs={}, example=False),
  HumanMessage(content='tell me about the Skalica Formation', additional_kwargs={}, example=False),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'search_geologic_data', 'arguments': '{\n  "arg1": "Skalica Formation"\n}'}}, example=False),
  FunctionMessage(content='[Document(page_content=\'\\nSkalica-Formation  (de) / Skalica Formation  (en)\\nGBA Status:\\nofficial use\\nDie Skalica-Formation wurde im Wiener Becken hangend der Studienka-Fm bzw. der Holíč-Fm. (sandige mergelige Tone und Silte) und liegend der Záhorie-Fm. ab