In [None]:
import os

import pinecone
from dotenv import load_dotenv
from langchain.agents import AgentType
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PythonLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain_experimental.agents.agent_toolkits import create_python_agent
from langchain_experimental.tools import PythonREPLTool

load_dotenv()


# initialize Vector DB
pinecone.init(
    api_key=os.environ.get("PINECONE_API_KEY"),
    environment=os.environ.get("PINECONE_ENV"),
)

# set Index Params
# note: dimension of the index is found from the embeddings model
# site: https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
# model: text-embedding-ada-002

index_name = "ai-developer"
index_dim = 1536
index_metric = "euclidean"

if index_name in pinecone.list_indexes():
    index = pinecone.Index(index_name)

    # if the vectors are present in index - clean up & create a new index
    # no better way due to free tier
    if index.describe_index_stats()["total_vector_count"] > 0:
        pinecone.delete_index(name=index_name)
        pinecone.create_index(name=index_name, dimension=index_dim, metric=index_metric)
    else:
        pass
else:
    # if index not present - create one
    pinecone.create_index(name=index_name, dimension=index_dim, metric=index_metric)

In [None]:
# load the file
loader = PythonLoader("load_py_file.py")
document = loader.load()

# split the file (note: previously used CharacterTextSplitter but it does not take language into account)
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=300, chunk_overlap=10
)
python_docs = python_splitter.split_documents(document)

print(f"Number of documets after the split on chunks: {len(python_docs)}")

# create embeddings
embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))
docsearch = Pinecone.from_documents(python_docs, embeddings, index_name=index_name)

In [None]:
# define RetrievalQA model (previsouly used VectorDBQA, but got deprecated)
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model_name="gpt-3.5-turbo-16k"),
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    verbose=True,
    return_source_documents=True,
)

# define the python agent
python_agent_executor = create_python_agent(
    llm=ChatOpenAI(temperature=0, model="gpt-4"),
    tool=PythonREPLTool(),
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True,  # retries on failed parsing at the first attempt
)

In [None]:
### running agents separately


# define the error and the query
error = "TypeError: can only concatenate str (not 'list') to str"
query = f"When running the code base script I get the error {error}.\
    Code base is in the vector database, you can look it up there.\
    Adjust the code to resolve this issue."

result = qa(query)

In [None]:
# view the output of running the model above
result

In [None]:
query_python = f"You are given these document: {result['source_documents']}.\
                You need to change the code for the relevant document from all documents provided given these instructions: {result['result']}.\
                You need to make sure to process the document and instructions from text format into the python code.\
                Action: Save this new processed code in current working directory, name the file solution and use expension.py"
python_agent_executor.run(query_python)

# Useful links:
- [Python Code Split](https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/code_splitter)
- [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings)
- Might be interesting for tool routing for later on: [Agent & Retrieval QA](https://stackoverflow.com/questions/77178370/how-to-retrieve-source-documents-via-langchains-get-relevant-documents-method-o) 
- The {context} needs to be added to the comment though [Prompt Template & Retrieval QA](https://stackoverflow.com/questions/76554411/unable-to-pass-prompt-template-to-retrievalqa-in-langchain)