# Environment


In [1]:
# import libraries
import os
import pandas as pd  # for storing text and embeddings data
from openai import OpenAI  # for calling the OpenAI API
from openai.types.chat import ChatCompletionMessageParam
from tenacity import (
    retry,
    wait_random_exponential,
    stop_after_attempt,
)  # for retrying API calls
import tiktoken  # for counting tokens
from utils.embeddings_utils import cosine_similarity
from utils.data_utils import remove_newlines

# models
EMBEDDING_MODEL = "text-embedding-3-large"
GPT_MODEL = "gpt-3.5-turbo"

In [2]:
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

# Data Cleaning


In [3]:
import re


def parse_directory(base_dir):
    data = []
    pattern = re.compile(r"[a-zA-Z0-9]+_[a-zA-Z0-9\-]+$")

    for root, dirs, files in os.walk(base_dir):
        for dir_name in dirs:
            if not pattern.match(dir_name):
                continue  # Skip directories not matching the pattern. e.g. ___prebuilt-tx
            dir_path = os.path.join(root, dir_name)
            network, case = dir_name.split("_")
            index_ts_path = os.path.join(dir_path, "index.ts")
            index_details_ts_path = os.path.join(dir_path, "index.details.ts")

            index_data = ""
            details_data = ""

            if os.path.exists(index_ts_path):
                with open(index_ts_path, "r") as index_file:
                    index_data = index_file.read()

            if os.path.exists(index_details_ts_path):
                with open(index_details_ts_path, "r") as details_file:
                    details_data = details_file.read()

            data.append(
                {
                    "case": case.replace("-", "_"),
                    "network": network,
                    "code": index_data,
                    "text": f"{case}. {network}. {index_data}. {details_data}",
                }
            )

    return data


directory_path = "case/"
data = parse_directory(directory_path)
df = pd.DataFrame(data)
df.to_csv("processed/case.csv")
print(f"Data shape: {df.shape}")

Data shape: (44, 4)


In [4]:
df["text"] = remove_newlines(df["text"])  # remove newlines for better embeddings
if not os.path.exists("processed/"):
    os.mkdir("processed/")
df.to_csv(f"processed/data.csv")
df.head()

Unnamed: 0,case,network,code,text
0,multi_sender,polygon,import type { Abi } from 'abitype'\nimport {\n...,multi-sender. polygon. import type { Abi } fro...
1,zircuit_eigenpie_swell,ethereum,import type { Abi } from 'abitype'\nimport {\n...,zircuit-eigenpie-swell. ethereum. import type ...
2,genesis_liquidity,ethereum,import type { Abi } from 'abitype'\nimport {\n...,genesis-liquidity. ethereum. import type { Abi...
3,yearn_v3_usdt,polygon,"import type { Abi, Address } from ""abitype"";\n...","yearn-v3-usdt. polygon. import type { Abi, Add..."
4,pendle_points,arbitrum,"import {\n BatchCase,\n Context,\n Tx,\n I...","pendle-points. arbitrum. import { BatchCase, C..."


# Create embeddings


In [5]:
# Retry up to 6 times with exponential backoff, starting at 1 second and maxing out at 20 seconds delay
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_embedding(text: str, model=EMBEDDING_MODEL) -> list:
    response = client.embeddings.create(input=[text], model=model)
    return response.data[0].embedding

In [6]:
df["text_embedding"] = df["text"].apply(lambda x: get_embedding(x))
df.to_csv(f"processed/embeddings.csv")

In [7]:
def search_case(df, query, top_n=3, n_lines=10):
    query_embedding = get_embedding(query)

    cases_and_relatednesses = [
        (
            row["case"],
            row["network"],
            row["code"],
            cosine_similarity(query_embedding, row["text_embedding"]),
        )
        for i, row in df.iterrows()
    ]
    cases_and_relatednesses.sort(key=lambda x: x[3], reverse=True)
    top_cases = cases_and_relatednesses[:top_n]
    cases, networks, codes, scores = zip(*top_cases)
    return list(zip(scores, cases, networks, codes))

In [8]:
# Example
result = search_case(df, "Deposit USDC to borrow ETH on AAVE", top_n=3)
for score, case, network, code in result:
    print(f"Score: {score.round(4)}")
    print(f"Case: {case}")
    print(f"Network: {network}")
    print(f"Code: {code[:20]}...")
    print("-" * 70)  # separator

Score: 0.5352
Case: aave_usdc_eth
Network: ethereum
Code: import type { Abi } ...
----------------------------------------------------------------------
Score: 0.5054
Case: aave_eth_eth
Network: ethereum
Code: import type { Abi } ...
----------------------------------------------------------------------
Score: 0.4912
Case: aave_usdt_eth
Network: ethereum
Code: import type { Abi } ...
----------------------------------------------------------------------


# Ask


In [9]:
from utils.data_utils import get_example_qa, read_constants

constants = read_constants()
example_question, example_answer = get_example_qa()


def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """
    Return the number of tokens in a string.
    """
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))


def query_message(query: str, df: pd.DataFrame, model: str, token_budget: int) -> str:
    """
    Return a message for GPT, with relevant source texts pulled from a dataframe.
    """
    result = search_case(df, query)
    scores, cases, networks, codes = zip(*result)
    introduction = f"Generate the code based on the question. Modify the code as needed. `inputs` can be omitted. Use the following data as reference: \n\nData: {constants}\n"
    question = f"\n\nQuestion: {query}"
    message = introduction
    for code in codes:
        data = f'\n\nCode:\n"""\n{code}\n"""'
        if num_tokens(message + data + question, model=model) > token_budget:
            break
        else:
            message += data
    return message + question


def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = GPT_MODEL,
    token_budget: int = 4096 - 500,
    print_message: bool = False,
):
    """
    Answers a query using GPT and a dataframe of relevant texts and embeddings.
    """
    message = query_message(query, df, model=model, token_budget=token_budget)
    if print_message:
        print(message)
    messages: list[ChatCompletionMessageParam] = [
        {
            "role": "system",
            "content": f"As a BentoBatch code generator, you will be provided with a question and a piece of typescript code. Your task is to generate the code for the question by updating the provided code.",
        },
        {"role": "user", "content": example_question},
        {"role": "assistant", "content": example_answer},
        {"role": "user", "content": message},
    ]
    response = client.chat.completions.create(
        model=model, messages=messages, temperature=0
    )
    return response.choices[0].message.content

In [10]:
ask(
    "I want to send 10 USDT to my friends on Polygon. Their addresses are '0x09Fb377F01229892a0F19364A08d997509e69Ba1', '0x69F1CDa80d4b9De5fC084938037a34caA480e2d0' and '0x1230b1F9B5eDBd8dD082aEC42188686D546b9d5c'"
)

"```typescript\nimport type { Abi } from 'abitype'\nimport {\n  Context,\n  BatchCase,\n  InputType,\n  Tag,\n  TagTitle,\n  Tx,\n  WalletType,\n} from '@/models/cases/v3/types'\nimport {\n  encodeFunctionData,\n  erc20Abi,\n  formatUnits,\n  isAddress,\n  parseUnits,\n} from 'viem'\nimport { balanceOf } from '../prebuilt-tx/ERC20'\nimport { AddressNotFoundError } from '../error'\n\nconst tokenConfig = {\n  addr: '0xc2132D05D31c914a87C6611C10748AEb04B58e8F' as `0x${string}`,\n  symbol: 'USDT',\n  decimals: 6,\n}\n\nconst multiSender: BatchCase = {\n  id: 'multi_sender',\n  name: 'Multi-Send of USDT on Polygon',\n  description: 'One-Click to send USDT to multiple addresses.',\n  details: BATCH_DETAILS,\n  website: {\n    title: 'Bento Batch 🍱',\n    url: 'https://x.com/bentobatch',\n  },\n  tags: [TagTitle.Asset].map((name) => ({ title: name }) as Tag),\n  curatorTwitter: {\n    name: 'Bento Batch 🍱',\n    url: 'https://x.com/bentobatch',\n  },\n  protocols: CASE_PROTOCOLS,\n  attribute

In [11]:
ask("Send 1 USDC to address '0x09Fb377F01229892a0F19364A08d997509e69Ba1' on Arbitrum")

'```typescript\nimport { Context, BatchCase, TagTitle, Tx, WalletType } from "@/cases/types";\nimport { balance, decimalValidator, getDecimals, max } from "@/models/cases/v3/utils";\nimport { parseUnits } from "viem";\nimport { BATCH_DETAILS, ATTRIBUTES, CASE_PROTOCOLS } from "./morpho.details";\nimport { mainnet } from "viem/chains";\nimport { AddressNotFoundError } from "../error";\nimport { depositPreviewTx, ethereumBundlerV2, erc4626DepositCallData, wrapToMulticallTx } from "../prebuilt-tx/morpho";\nimport { transferPreviewTx, transferTx } from "../prebuilt-tx/ERC20";\n\nconst inputTokenSymbol = "USDC";\nconst inputTokenAddr = "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48";\nconst morphoVault = "0x4cA0E178c94f039d7F202E09d8d1a655Ed3fb6b6";\nconst vaultName = "LeadBlock USDC RWA";\nconst morphoPageLink = "https://app.morpho.org/vault?vault=0x4cA0E178c94f039d7F202E09d8d1a655Ed3fb6b6";\nconst txCount = 2;\n\nconst tags = [{ title: TagTitle.Defi }, { title: TagTitle.RWA }];\n\nconst lead

In [12]:
ask(
    "I want to send 1 USDC equally on Base to 2 addresses: '0x69F1CDa80d4b9De5fC084938037a34caA480e2d0' and '0x1230b1F9B5eDBd8dD082aEC42188686D546b9d5c'"
)

"```typescript\nimport type { Abi } from 'abitype'\nimport {\n  Context,\n  BatchCase,\n  InputType,\n  Tag,\n  TagTitle,\n  Tx,\n  WalletType,\n} from '@/models/cases/v3/types'\nimport { balance, decimalValidator, max } from '@/models/cases/v3/utils'\nimport {\n  encodeFunctionData,\n  erc20Abi,\n  formatUnits,\n  isAddress,\n  parseUnits,\n} from 'viem'\nimport { balanceOf } from '../prebuilt-tx/ERC20'\nimport { AddressNotFoundError } from '../error'\nimport BATCH_DETAILS, { ATTRIBUTES, CASE_PROTOCOLS } from './index.details'\n\nconst tokenConfig = {\n  addr: '0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913' as `0x${string}`,\n  symbol: 'USDC',\n  decimals: 6,\n}\n\nconst multiSender: BatchCase = {\n  id: 'multi_sender',\n  name: 'Multi-Send of USDC on Base',\n  description: 'One-Click to send USDC to multiple addresses on Base.',\n  details: BATCH_DETAILS,\n  website: {\n    title: 'Bento Batch 🍱',\n    url: 'https://x.com/bentobatch',\n  },\n  tags: [TagTitle.Asset].map((name) => ({ tit