In [1]:
import autogen

config_list = autogen.config_list_from_models(model_list=[ "gpt-3.5-turbo"], exclude="aoai", openai_api_key_file="key_openai.txt")

In [2]:
print(config_list)

[{'api_key': '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', 'model': 'gpt-3.5-turbo'}]


In [3]:
# Import things that are needed generically
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool
from typing import Optional, Type
import math

class CircumferenceToolInput(BaseModel):
    radius: float = Field()

class CircumferenceTool(BaseTool):
    name = "circumference_calculator"
    description = "Use this tool when you need to calculate a circumference using the radius of a circle"
    args_schema: Type[BaseModel] = CircumferenceToolInput

    def _run(self, radius: float):
        return float(radius) * 2.0 * math.pi

In [4]:
from langchain.tools.file_management.read import ReadFileTool

# Define a function to generate llm_config from a LangChain tool
def generate_llm_config(tool):
    # Define the function schema based on the tool's args_schema
    function_schema = {
        "name": tool.name.lower().replace (' ', '_'),
        "description": tool.description,
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
        },
    }

    if tool.args is not None:
      function_schema["parameters"]["properties"] = tool.args

    return function_schema

# Instantiate the ReadFileTool
read_file_tool = ReadFileTool()
custom_tool = CircumferenceTool()

# Construct the llm_config
llm_config = {
  #Generate functions config for the Tool
  "functions":[
      generate_llm_config(custom_tool),
      generate_llm_config(read_file_tool),
  ],
  "config_list": config_list,  # Assuming you have this defined elsewhere
  "request_timeout": 120,
}

user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    code_execution_config={"work_dir": "coding"},
)

# Register the tool and start the conversation
user_proxy.register_function(
    function_map={
        custom_tool.name: custom_tool._run,
        read_file_tool.name: read_file_tool._run,
    }
)

chatbot = autogen.AssistantAgent(
    name="chatbot",
    system_message="For coding tasks, only use the functions you have been provided with. Reply TERMINATE when the task is done.",
    llm_config=llm_config,
)

user_proxy.initiate_chat(
    chatbot,
    message="Read the file with the path 'Test.txt', then calculate the circumference of a circle that has a radius of that files contents.", #" 7.81mm" in the file
    llm_config=llm_config,
)

[33muser_proxy[0m (to chatbot):

Read the file with the path 'Test.txt', then calculate the circumference of a circle that has a radius of that files contents.

--------------------------------------------------------------------------------


[33mchatbot[0m (to user_proxy):

[32m***** Suggested function Call: read_file *****[0m
Arguments: 
{
  "file_path": "Test.txt"
}
[32m**********************************************[0m

--------------------------------------------------------------------------------
[35m
>>>>>>>> EXECUTING FUNCTION read_file...[0m
[33muser_proxy[0m (to chatbot):

[32m***** Response from calling function "read_file" *****[0m
Error: no such file or directory: Test.txt
[32m******************************************************[0m

--------------------------------------------------------------------------------
[33mchatbot[0m (to user_proxy):

I'm sorry, but I couldn't find the file with the path 'Test.txt'. Please make sure the file exists and try again.

--------------------------------------------------------------------------------
[33muser_proxy[0m (to chatbot):



--------------------------------------------------------------------------------
[33mchatbot[0m (to user_proxy):

[32m*

In [5]:
from langchain.tools.file_management.read import ReadFileTool

# Define a function to generate llm_config from a LangChain tool
def generate_llm_config(tool):
    # Define the function schema based on the tool's args_schema
    function_schema = {
        "name": tool.name.lower().replace (' ', '_'),
        "description": tool.description,
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
        },
    }

    if tool.args is not None:
      function_schema["parameters"]["properties"] = tool.args

    return function_schema

# Instantiate the ReadFileTool
read_file_tool = ReadFileTool()
custom_tool = CircumferenceTool()

# Construct the llm_config
llm_config = {
  #Generate functions config for the Tool
  "functions":[
      generate_llm_config(custom_tool),
      generate_llm_config(read_file_tool),
  ],
  "config_list": config_list,  # Assuming you have this defined elsewhere
  "request_timeout": 120,
}

user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    code_execution_config={"work_dir": "coding"},
)

# Register the tool and start the conversation
user_proxy.register_function(
    function_map={
        custom_tool.name: custom_tool._run,
        read_file_tool.name: read_file_tool._run,
    }
)

chatbot = autogen.AssistantAgent(
    name="chatbot",
    system_message="For coding tasks, only use the functions you have been provided with. Reply TERMINATE when the task is done.",
    llm_config=llm_config,
)

user_proxy.initiate_chat(
    chatbot,
    message="Read the file with the path 'Test.txt', then calculate the circumference of a circle that has a radius of that files contents.", #" 7.81mm" in the file
    llm_config=llm_config,
)

[33muser_proxy[0m (to chatbot):

Read the file with the path 'Test.txt', then calculate the circumference of a circle that has a radius of that files contents.

--------------------------------------------------------------------------------
[33mchatbot[0m (to user_proxy):

[32m***** Suggested function Call: read_file *****[0m
Arguments: 
{
  "file_path": "Test.txt"
}
[32m**********************************************[0m

--------------------------------------------------------------------------------
[35m
>>>>>>>> EXECUTING FUNCTION read_file...[0m
[33muser_proxy[0m (to chatbot):

[32m***** Response from calling function "read_file" *****[0m
Error: no such file or directory: Test.txt
[32m******************************************************[0m

--------------------------------------------------------------------------------
[33mchatbot[0m (to user_proxy):

I'm sorry, but I couldn't find the file with the path 'Test.txt'. Please make sure the file exists and try agai

In [30]:
from autogen import oai

response = oai.Completion.create(
    config_list=[
        {
            "model": "llama-6b",
            "api_base": "http://localhost:1234/v1",
            "api_type": "open_ai",
            "api_key": "NULL", # just a placeholder
            "temperature":0.7
            "max_tokens":-1
        }
    ],
    prompt="Tell me about Large Language Models",
)

print(response)

{
  "id": "cmpl-644665eb-de48-4f97-bf4a-9f843af96636",
  "object": "text_completion",
  "created": 1697450042,
  "model": "llama-6b",
  "choices": [
    {
      "text": " (LLMs).\nLarge Language Models are trained on large corpor",
      "index": 0,
      "logprobs": null,
      "finish_reason": "length"
    }
  ],
  "usage": {
    "prompt_tokens": 9,
    "completion_tokens": 16,
    "total_tokens": 25
  },
  "cost": 0,
  "config_id": 0,
  "pass_filter": true
}


In [28]:
#Starndard Langchain example
from langchain.agents import create_spark_sql_agent
from langchain.agents.agent_toolkits import SparkSQLToolkit
from langchain.chat_models import ChatOpenAI
from langchain.utilities.spark_sql import SparkSQL
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()
schema = "langchain_example"
spark.sql(f"CREATE DATABASE IF NOT EXISTS {schema}")
spark.sql(f"USE {schema}")
csv_file_path = "california_housing_train.csv"
table = "california_housing_train"
spark.read.csv(csv_file_path, header=True, inferSchema=True).write.option("path", "content/spark-warehouse/langchain_example.db/california_housing_train").mode("overwrite").saveAsTable(table)
spark.table(table).show()

ConnectionRefusedError: [Errno 61] Connection refused

In [26]:
# Note, you can also connect to Spark via Spark connect. For example:
# db = SparkSQL.from_uri("sc://localhost:15002", schema=schema)
spark_sql = SparkSQL(schema=schema)
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key="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")
toolkit = SparkSQLToolkit(db=spark_sql, llm=llm)
agent_executor = create_spark_sql_agent(llm=llm, toolkit=toolkit, verbose=True)

ConnectionRefusedError: [Errno 61] Connection refused

In [8]:
#Starndard Langchain example
agent_executor.run("Describe the california_housing_train table")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: list_tables_sql_db
Action Input: ""[0m
Observation: [38;5;200m[1;3mcalifornia_housing_train[0m
Thought:[32;1m[1;3mI see that there is a table called "california_housing_train" in the database. I can use the "schema_sql_db" tool to get more information about this table.
Action: schema_sql_db
Action Input: "california_housing_train"[0m
Observation: [33;1m[1;3mCREATE TABLE spark_catalog.langchain_example.california_housing_train (
  longitude DOUBLE,
  latitude DOUBLE,
  housing_median_age DOUBLE,
  total_rooms DOUBLE,
  total_bedrooms DOUBLE,
  population DOUBLE,
  households DOUBLE,
  median_income DOUBLE,
  median_house_value DOUBLE)
;

/*
3 rows from california_housing_train table:
longitude	latitude	housing_median_age	total_rooms	total_bedrooms	population	households	median_income	median_house_value
-114.31	34.19	15.0	5612.0	1283.0	1015.0	472.0	1.4936	66900.0
-114.47	34.4	19.0	7650.0	1901.0	1129.0	463.0	1.82

'The "california_housing_train" table has the following columns: longitude, latitude, housing_median_age, total_rooms, total_bedrooms, population, households, median_income, and median_house_value.'

In [12]:
import autogen

config_list = autogen.config_list_from_models(model_list=[ "gpt-3.5-turbo"], exclude="aoai", openai_api_key_file="key_openai.txt")


In [13]:
# Define a function to generate llm_config from a LangChain tool
def generate_llm_config(tool):
    # Define the function schema based on the tool's args_schema
    function_schema = {
        "name": tool.name.lower().replace (' ', '_'),
        "description": tool.description,
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
        },
    }

    if tool.args is not None:
      function_schema["parameters"]["properties"] = tool.args

    return function_schema

In [19]:
# Now use AutoGen with Langchain Tool Bridgre
tools = []
function_map = {}

for tool in toolkit.get_tools(): #debug_toolkit if you want to use tools directly
    tool_schema = generate_llm_config(tool)
    print(tool_schema)
    tools.append(tool_schema)
    function_map[tool.name] = tool._run

# Construct the llm_config
llm_config = {
  "functions": tools,
  "config_list": config_list,  # Assuming you have this defined elsewhere
  "request_timeout": 120,
}

user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=1,
    code_execution_config={"work_dir": "coding"},
)

print(function_map)

# Register the tool and start the conversation
user_proxy.register_function(
    function_map = function_map
)

chatbot = autogen.AssistantAgent(
    name="chatbot",
    system_message="For coding tasks, only use the functions you have been provided with. Reply TERMINATE when the task is done.",
    llm_config=llm_config,
)

user_proxy.initiate_chat(
    chatbot,
    message="give me the best housing price from california_housing_train table",
    llm_config=llm_config,
)

{'name': 'query_sql_db', 'description': '\n    Input to this tool is a detailed and correct SQL query, output is a result from the Spark SQL.\n    If the query is not correct, an error message will be returned.\n    If an error is returned, rewrite the query, check the query, and try again.\n    ', 'parameters': {'type': 'object', 'properties': {'query': {'title': 'Query', 'type': 'string'}}, 'required': []}}
{'name': 'schema_sql_db', 'description': '\n    Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables.\n    Be sure that the tables actually exist by calling list_tables_sql_db first!\n\n    Example Input: "table1, table2, table3"\n    ', 'parameters': {'type': 'object', 'properties': {'table_names': {'title': 'Table Names', 'type': 'string'}}, 'required': []}}
{'name': 'list_tables_sql_db', 'description': 'Input is an empty string, output is a comma separated list of tables in the Spark SQL.', 'parameters': {'type': 'object'

23/10/31 14:12:45 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 389564 ms exceeds timeout 120000 ms
23/10/31 14:12:45 WARN SparkContext: Killing executors is not supported by current scheduler.
23/10/31 14:12:53 ERROR Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:56)
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:310)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:124)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$