In [None]:
# got this from internet
import pathlib
import subprocess
import tempfile
from langchain.docstore.document import Document
import requests
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

def get_github_docs(repo_owner, repo_name):
    with tempfile.TemporaryDirectory() as d:
        subprocess.check_call(
            f"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .",
            cwd=d,
            shell=True,
        )
        git_sha = (
            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
            .decode("utf-8")
            .strip()
        )
        repo_path = pathlib.Path(d)
        doc_files = []
        for extension in ['.md', '.mdx', '.ipynb']:
            
            doc_files.extend(list(repo_path.glob(f"**/*{extension}")))

        for doc_file in doc_files:
            with open(doc_file, "r") as f:
                relative_path = doc_file.relative_to(repo_path)
                github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"
                yield Document(page_content=f.read(), metadata={"source": github_url})
                
sources = get_github_docs("hwchase17", "langchain")
# list(sources)

source_chunks = []
splitter = CharacterTextSplitter(separator=" ", chunk_size=1024, chunk_overlap=0)
for source in sources:
    for chunk in splitter.split_text(source.page_content):
        source_chunks.append(Document(page_content=chunk, metadata=source.metadata))
        
search_index = FAISS.from_documents(source_chunks, OpenAIEmbeddings())
chain = load_qa_with_sources_chain(OpenAI(temperature=0, model_name='gpt-4'))

question = "I want to create a langchain tool that parses and validates blocks of python code. Will you write one for me"
output = chain(
        {
            "input_documents": search_index.similarity_search(question, k=4),
            "question": question,
        },
        return_only_outputs=True,
    )["output_text"]

In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI, VectorDBQA
from bootstrap import langchain_vectorstore_path
from bootstrap.interface_setup import load_environment_variables

load_environment_variables()

In [2]:
langchain_vectorstore = FAISS.load_local(langchain_vectorstore_path, embeddings=OpenAIEmbeddings())

In [7]:
llm = OpenAI(temperature=0., model_name='gpt-3.5-turbo')
langchain_docs_chain = VectorDBQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    vectorstore=langchain_vectorstore,
    verbose=True,
    )

In [21]:
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
from bootstrap import repo_root, package_root

In [22]:
from llama_index import download_loader

GPTRepoReader = download_loader("GPTRepoReader")

loader = GPTRepoReader()


In [23]:

documents = loader.load_data(repo_path=repo_root)

In [None]:

index = GPTSimpleVectorIndex(documents)

In [None]:
index.query('what libraries are imported in interface_setup.py?')

In [13]:
# Import things that are needed generically
from langchain.agents import initialize_agent, Tool, load_tools
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper

tools = [
    Tool(
        name = "Langchain docs search",
        func=langchain_docs_chain.run,
        description="Useful for when you need to lookup information about `langchain` - a library for composing LLMs. Input should be a fully formed question."
    ),
    
]
tools.extend(load_tools(tool_names=['requests']))

# Construct the agent. We will use the default agent type here.
# See documentation for a full list of options.
agent = initialize_agent(
    tools=tools, 
    llm=OpenAI(temperature=0., model_name='gpt-3.5-turbo'), 
    agent="zero-shot-react-description", 
    verbose=True)

In [None]:
agent.run('List all the agents available in langchain')

In [18]:
agent.run('Show me the agent documentation')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to locate the documentation for agents using the available tools
Action: Langchain docs search
Action Input: "How to use langchain agents"[0m

[1m> Entering new VectorDBQA chain...[0m

[1m> Finished chain.[0m

Observation: [36;1m[1;3m LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end to end agents. For more information on how to use them, please see the [Agent Documentation](../modules/agents.rst).[0m
Thought:[32;1m[1;3mI have found the agent documentation
Final Answer: The agent documentation can be found at ../modules/agents.rst in the Langchain docs.[0m

[1m> Finished chain.[0m


'The agent documentation can be found at ../modules/agents.rst in the Langchain docs.'

In [11]:
import inspect
from langchain.agents import tool



@tool("Get module source code")
def get_module_source_code(module_name):
    """Returns source code for an importable module."""
    try:
        module = __import__(module_name)
        source = inspect.getsource(module)
        return source
    except ImportError:
        print(f"Could not import module '{module_name}'")
        return None

In [20]:
get_module_source_code("bootstrap.interface_setup")

'from pathlib import Path\n\nrepo_root = Path(__file__).parent.parent\npackage_root = Path(__file__).parent\nvectorstores_root = package_root / "vectorstores"\nlangchain_vectorstore_path = vectorstores_root / "langchain.faiss"\n'

In [1]:

from langchain.agents import initialize_agent, Tool
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

from bootstrap.introspection import (
    get_current_repo_functions_and_classes, 
    get_current_repo_definitions_summary,
    get_source_code,
    edit_source_code,
    )

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

tools = [
    Tool(
        name="Get current repo functions and classes",
        func=get_current_repo_definitions_summary,
        description=get_current_repo_definitions_summary.__doc__,
    ),
    Tool(
        name="Get source code",
        func=get_source_code,
        description=get_source_code.__doc__,
    ),
    Tool(
        name="Edit source code",
        func=edit_source_code,
        description=edit_source_code.__doc__,
    )
]


# Construct the agent. We will use the default agent type here.
# See documentation for a full list of options.
agent = initialize_agent(
    tools=tools, 
    # llm=ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo', ), 
    llm=ChatOpenAI(temperature=0.7, model_name='gpt-4', ), 
    agent="conversational-react-description", 
    verbose=True,
    memory=memory,
    
    )

In [2]:
agent.run('give me the names of functions in bootstrap.repo2docstore')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: Get current repo functions and classes
Action Input: bootstrap.repo2docstore[0m
Observation: [36;1m[1;3m[{'qualname': 'bootstrap.interface_setup.load_environment_variables', 'type': 'function', 'signature': 'load_environment_variables()', 'docstring': None}, {'qualname': 'bootstrap.interface_setup.create_human_message_template', 'type': 'function', 'signature': 'create_human_message_template()', 'docstring': None}, {'qualname': 'bootstrap.interface_setup.create_chat_prompt', 'type': 'function', 'signature': 'create_chat_prompt(human_message)', 'docstring': None}, {'qualname': 'bootstrap.interface_setup.create_conversation_chain', 'type': 'function', 'signature': 'create_conversation_chain(chat_prompt)', 'docstring': None}, {'qualname': 'bootstrap.dummy_module.dummy_function', 'type': 'function', 'signature': 'dummy_function(a, b)', 'docstring': None}, {'qualname': 'bootstrap.du

'Here are the function names in the `bootstrap.repo2docstore` module:\n\n1. create_faiss_index\n2. get_github_docs'

In [2]:
agent.run('get the source code for `bootstrap.repo2docstore.get_github_docs`')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: Get source code
Action Input: bootstrap.repo2docstore.get_github_docs[0m
Observation: [33;1m[1;3mdef get_github_docs(repo_owner, repo_name):
    with tempfile.TemporaryDirectory() as d:
        subprocess.check_call(
            f"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .",
            cwd=d,
            shell=True,
        )
        git_sha = (
            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
            .decode("utf-8")
            .strip()
        )
        repo_path = pathlib.Path(d)
        doc_files = []
        for extension in ['.md', '.mdx', '.ipynb', '.py', '.rst']:
            doc_files.extend(list(repo_path.glob(f"**/*{extension}")))
        for doc_file in doc_files:
            with open(doc_file, "r") as f:
                relative_path = doc_file.relative_to(repo_path)
                github_url = f"https:/

'Here is the source code for `bootstrap.repo2docstore.get_github_docs`:\n\n```python\ndef get_github_docs(repo_owner, repo_name):\n    with tempfile.TemporaryDirectory() as d:\n        subprocess.check_call(\n            f"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .",\n            cwd=d,\n            shell=True,\n        )\n        git_sha = (\n            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)\n            .decode("utf-8")\n            .strip()\n        )\n        repo_path = pathlib.Path(d)\n        doc_files = []\n        for extension in [\'.md\', \'.mdx\', \'.ipynb\', \'.py\', \'.rst\']:\n            doc_files.extend(list(repo_path.glob(f"**/*{extension}")))\n        for doc_file in doc_files:\n            with open(doc_file, "r") as f:\n                relative_path = doc_file.relative_to(repo_path)\n                github_url = f"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}"\n                yield 

In [4]:
agent.run('rewrite the source code for `bootstrap.repo2docstore.get_github_docs` so that it has type-hinting, then edit the source code')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: Edit source code
Action Input: bootstrap.repo2docstore.get_github_docs[0m
Observation: [38;5;200m[1;3m
Error parsing inputs. Please ensure that the inputs are formatted as follows:
{'qualname': '<qualname of the function or class>', 'new_code': '<new code>'}
[0m
Thought:[32;1m[1;3mDo I need to use a tool? No
AI: Before proceeding to edit the source code, I'll first provide the rewritten source code for `bootstrap.repo2docstore.get_github_docs` with type-hinting. Here's the updated code:

```python
from pathlib import Path
from typing import Iterator

def get_github_docs(repo_owner: str, repo_name: str) -> Iterator[Document]:
    with tempfile.TemporaryDirectory() as d:
        subprocess.check_call(
            f"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .",
            cwd=d,
            shell=True,
        )
        git_sha = (
            subproc

'Before proceeding to edit the source code, I\'ll first provide the rewritten source code for `bootstrap.repo2docstore.get_github_docs` with type-hinting. Here\'s the updated code:\n\n```python\nfrom pathlib import Path\nfrom typing import Iterator\n\ndef get_github_docs(repo_owner: str, repo_name: str) -> Iterator[Document]:\n    with tempfile.TemporaryDirectory() as d:\n        subprocess.check_call(\n            f"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .",\n            cwd=d,\n            shell=True,\n        )\n        git_sha = (\n            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)\n            .decode("utf-8")\n            .strip()\n        )\n        repo_path = pathlib.Path(d)\n        doc_files = []\n        for extension in [\'.md\', \'.mdx\', \'.ipynb\', \'.py\', \'.rst\']:\n            doc_files.extend(list(repo_path.glob(f"**/*{extension}")))\n        for doc_file in doc_files:\n            with open(doc_file, "r") as f

In [5]:
agent.run('that looks good - edit the source code and overwrite the definition with your new one. make sure to respect the formatting of the Edit Source Code tool')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: Edit source code
Action Input: 
from pathlib import Path
from typing import Iterator

def get_github_docs(repo_owner: str, repo_name: str) -> Iterator[Document]:
    with tempfile.TemporaryDirectory() as d:
        subprocess.check_call(
            f"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .",
            cwd=d,
            shell=True,
        )
        git_sha = (
            subprocess.check_output("git rev-parse HEAD", shell=True, cwd=d)
            .decode("utf-8")
            .strip()
        )
        repo_path = pathlib.Path(d)
        doc_files = []
        for extension in ['.md', '.mdx', '.ipynb', '.py', '.rst']:
            doc_files.extend(list(repo_path.glob(f"**/*{extension}")))
        for doc_file in doc_files:
            with open(doc_file, "r") as f:
                relative_path = doc_file.relative_to(repo_path)
                gith

ValueError: Could not parse LLM output: `I apologize, but I am unable to modify the source code of the function using the tools available to me. However, you can apply the changes to the source code manually using the provided rewritten code with type-hinting.`