In [38]:
import os
from dotenv import load_dotenv
from langchain.document_loaders import PythonLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
from langchain_experimental.tools import PythonREPLTool
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain.chains import RetrievalQA
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    Language,
)

load_dotenv()


# initialize Vector DB
pinecone.init(
    api_key=os.environ.get("PINECONE_API_KEY"),
    environment=os.environ.get("PINECONE_ENV"),
)

# set Index Params
# note: dimension of the index is found from the embeddings model
# site: https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
# model: text-embedding-ada-002

index_name = "ai-developer"
index_dim = 1536
index_metric = "euclidean"

if index_name in pinecone.list_indexes():
    index = pinecone.Index(index_name)

    # if the vectors are present in index - clean up & create a new index
    # no better way due to free tier
    if index.describe_index_stats()["total_vector_count"] > 0:
        pinecone.delete_index(name=index_name)
        pinecone.create_index(name=index_name, dimension=index_dim, metric=index_metric)
    else:
        pass
else:
    # if index not present - create one
    pinecone.create_index(name=index_name, dimension=index_dim, metric=index_metric)

In [39]:
# load the file
loader = PythonLoader("load_py_file.py")
document = loader.load()

# split the file (note: previously used CharacterTextSplitter but it does not take language into account)
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=300, chunk_overlap=10
)
python_docs = python_splitter.split_documents(document)

print(f"Number of documets after the split on chunks: {len(python_docs)}")

# create embeddings
embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
docsearch = Pinecone.from_documents(python_docs, embeddings, index_name=index_name)

Number of documets after the split on chunks: 9


In [66]:
# define RetrievalQA model (previsouly used VectorDBQA, but got deprecated)
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model_name="gpt-3.5-turbo-16k"),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    verbose=True,
    return_source_documents=True,
)

# define the python agent
python_agent_executor = create_python_agent(
    llm=ChatOpenAI(temperature=0, model="gpt-4"),
    tool=PythonREPLTool(),
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True,  # retries on failed parsing at the first attempt
)

In [72]:
### running agents separately

# define the error and the query
error = "TypeError: can only concatenate str (not 'list') to str"
query = f"When running the code base script I get the error {error}.\
    Code base is in the vector database, you can look it up there.\
    Adjust the code to resolve this issue."

result = qa(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [73]:
# view the output of running the model above
result

{'query': "When running the code base script I get the error TypeError: can only concatenate str (not 'list') to str.    Code base is in the vector database, you can look it up there.    Adjust the code to resolve this issue.",
 'result': 'To resolve the "TypeError: can only concatenate str (not \'list\') to str" issue, you need to modify the `sum_function` to handle the case where one of the inputs is a list. Here\'s an adjusted version of the code:\n\n```python\ndef sum_function(a, b):\n    if isinstance(a, list) or isinstance(b, list):\n        raise TypeError("Both inputs must be of the same type.")\n    return a + b\n\nresult = sum_function("a", ["b", "b", "b"])\nprint(result)\n```\n\nThis code will raise an error if either `a` or `b` is a list, ensuring that the function can only concatenate strings.',
 'source_documents': [Document(page_content='# This is a Python code snippet with an intentional error', metadata={'source': 'load_py_file.py'}),
  Document(page_content='def sum_f

In [76]:
query_python = f"You are given these document: {result['source_documents']}.\
                You need to change the code for the relevant document from all documents provided given these instructions: {result['result']}.\
                You need to make sure to process the document and instructions from text format into the python code.\
                Action: Save this new processed code in current working directory, name the file solution and use expension.py"
python_agent_executor.run(query_python)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe task requires me to find the document that contains the `sum_function` and replace it with the provided code snippet. After that, I need to save the updated code in a file named `solution.py` in the current working directory. 

To accomplish this, I will first create a list of the documents. Then, I will iterate over the documents to find the one that contains the `sum_function`. Once I find the document, I will replace the old `sum_function` with the new one provided in the instructions. Finally, I will write the updated code to a file named `solution.py`.

Let's start by creating a list of the documents. 

Action: Python_REPL
Action Input: 
```python
documents = [
    Document(page_content='# This is a Python code snippet with an intentional error', metadata={'source': 'load_py_file.py'}),
    Document(page_content='def sum_function(a, b):\n    return a + b\n\nsum_function("a", ["b", "b", "b"])', metadata={'source': 'lo

'The updated code has been successfully written to the file `solution.py` in the current working directory.'

# Useful links:
- [Python Code Split](https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/code_splitter)
- [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)
- Might be interesting for tool routing for later on: [Agent & Retrieval QA](https://stackoverflow.com/questions/77178370/how-to-retrieve-source-documents-via-langchains-get-relevant-documents-method-o) 
- The {context} needs to be added to the comment though [Prompt Template & Retrieval QA](https://stackoverflow.com/questions/76554411/unable-to-pass-prompt-template-to-retrievalqa-in-langchain)