In [9]:
import os
import langchain
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chat_models import google_palm
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.agents import initialize_agent
from langchain.tools import StructuredTool
from langchain.prompts import MessagesPlaceholder
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents import create_pandas_dataframe_agent
import pandas as pd
from IPython.display import display, HTML
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import HtmlFormatter
import json
from dotenv import load_dotenv
load_dotenv()

True

In [6]:
from langchain.callbacks.manager import tracing_v2_enabled
from langsmith import Client
client = Client()

LANGCHAIN_TRACING_V2=os.getenv("LANGCHAIN_TRACING_V2")
LANGCHAIN_API_KEY=os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT="palm-ragfusion-shadow-99"  # if not specified, defaults to "default"
LANGCHAIN_ENDPOINT=os.getenv("LANGCHAIN_ENDPOINT")
OPENAI_API_KEY=OPENAI_API_KEY

In [10]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")

In [11]:
langchain.debug = True
data_dir_path = os.path.join(os.getcwd(), "data")

In [12]:
pandas_agent_model_kwargs = {
    "model": "gemini-pro",
    "temperature": 0,
    "google_api_key": "google_api_key"
}
pandas_agent_model = GoogleGenerativeAI(**pandas_agent_model_kwargs)

entries_a_df = pd.read_csv(os.path.join(data_dir_path, 'legal_entries_a.csv'))
entries_b_df = pd.read_csv(os.path.join(data_dir_path, 'legal_entries_b.csv'))
template_df = pd.read_csv(os.path.join(data_dir_path, 'legal_template.csv'))

dataframes = [entries_a_df, entries_b_df, template_df]

# Default naming convention defined in langchain.agents.agent_toolkits.pandas.base._get_multi_functions_input
langchain_df_name_to_df_map: dict[str, pd.DataFrame] = {
    f'df{i + 1}': df
    for i, df in enumerate(dataframes)
}

pandas_agent = create_pandas_dataframe_agent(
    pandas_agent_model,
    dataframes,
    agent_type=AgentType.OPENAI_FUNCTIONS,
)

In [34]:
# question = "What are the differences between the table structure of all of the dataframes?"
# res = pandas_agent.run(input=question)

In [16]:
llm = GoogleGenerativeAI(temperature=0.3, model="gpt-3.5-turbo-16k-0613",google_api_key=google_api_key)

def validate_function_input(function_input: str):
    '''Always use this before calling any other function. It will raise an exception if the input is not valid JSON.'''
    try:
        json.loads(function_input)
        return None
    except Exception as e:
        return 'Input is not valid JSON: ' + str(e)

def read_from_database(question: str):
    '''Use this to ask natural language questions about tables in the database. The input must be a string that can be parsed into JSON. So escape necessary characters.'''
    if res := validate_function_input(question):
        return res
    return pandas_agent.run(question)

def write_to_database(valid_python_code: str):
    '''Use this to write to the database. The input must be a string must be valid python code that can be parsed into JSON. So escape necessary characters.'''
    if res := validate_function_input(valid_python_code):
        return res
    return pandas_agent.run("Execute this code: " + valid_python_code)

tools = [
    StructuredTool.from_function(read_from_database),
    StructuredTool.from_function(write_to_database)
]
agent_kwargs = {
    "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
}
memory = ConversationBufferMemory(memory_key="memory", return_messages=True)
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
    agent_kwargs=agent_kwargs,
    memory=memory,
    handle_parsing_errors=True
)

In [17]:
questions = ["What are the differences between the table structure of all of the dataframes?",
             "Given these differences, please write an outline for a python script that would transform df1 to df3. Do not write code, just return an outline of the step you would take. Be precise and validate your work.",
             "Excellent! Now, please write the code to transform df1 to df3. Your output should be 100% valid python code. The only explanations should be in the comments. Go!",
             "This is a good start, but you included text that wasn't in python code. Do not do this. Only return valid python code. Please try again.",
             "Good. Now copy the the original df1 and execute this code on the copy. Do not modify df1. Then compare this new dataframe to df1. Is all of the data there? Is it mapping properly to the new columns? If not, please fix it.",
             "No, I actually want you to run this code. Generate a new csv. This will be called df1_transformed.csv."
]
formatter = HtmlFormatter()
for question in questions:
    res = agent.run(question)
    memory.chat_memory.add_user_message(question)
    memory.chat_memory.add_ai_message(res)
    highlighted_code = highlight(res, PythonLexer(), formatter)
    display(HTML(highlighted_code))

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "What are the differences between the table structure of all of the dataframes?",
  "memory": []
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: You are a helpful AI assistant.\nHuman: What are the differences between the table structure of all of the dataframes?"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] [943ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "",
        "generation_info": {
          "finish_reason": "function_call",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwar

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: You are a helpful AI assistant.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: Given these differences, please write an outline for a python script that would transform df1 to df3. Do not write code, just return an outline of the step you would take. Be precise and validate your work."
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] [4.04s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "To transform df1 to df3, you would need to perfor

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: You are a helpful AI assistant.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: Given these differences, please write an outline for a python script that would transform df1 to df3. Do not write code, just return an outline of the step you would take. Be precise and validate your work.\nAI: To transform df1 to df3, you would need to perform the following steps:\n\n1. Drop any columns in df1 that are not present in df3.\n2. Rename any columns in df1 that have different names in df3.\n3. Add any columns in 

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: You are a helpful AI assistant.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: Given these differences, please write an outline for a python script that would transform df1 to df3. Do not write code, just return an outline of the step you would take. Be precise and validate your work.\nAI: To transform df1 to df3, you would need to perform the following steps:\n\n1. Drop any columns in df1 that are not present in df3.\n2. Rename any columns in df1 that have different names in df3.\n3. Add any columns in 

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: You are a helpful AI assistant.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: Given these differences, please write an outline for a python script that would transform df1 to df3. Do not write code, just return an outline of the step you would take. Be precise and validate your work.\nAI: To transform df1 to df3, you would need to perform the following steps:\n\n1. Drop any columns in df1 that are not present in df3.\n2. Rename any columns in df1 that have different names in df3.\n3. Add any columns in 

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: You are a helpful AI assistant.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: What are the differences between the table structure of all of the dataframes?\nAI: Agent stopped due to iteration limit or time limit.\nHuman: Given these differences, please write an outline for a python script that would transform df1 to df3. Do not write code, just return an outline of the step you would take. Be precise and validate your work.\nAI: To transform df1 to df3, you would need to perform the following steps:\n\n1. Drop any columns in df1 that are not present in df3.\n2. Rename any columns in df1 that have different names in df3.\n3. Add any columns in 

In [None]:
formatter = HtmlFormatter()
highlighted_code = highlight(res, PythonLexer(), formatter)
display(HTML(highlighted_code))

In [None]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.agents import AgentExecutor
db = SQLDatabase.from_uri("sqlite:///../../../../../notebooks/data/test.db")
toolkit = SQLDatabaseToolkit(db=db, llm=OpenAI(temperature=0))
agent_executor = create_sql_agent(
    llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
    toolkit=toolkit,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS
)

In [None]:
# have a tool for each individual function.
# When you do a full query of the table, bypass the standard return and add directly to the gradio chat. Then add a message to the chat with the head of the query. And return a short message to the user that it has been added to the chat. Maybe it should generate a file download, too.