In [1]:
import pickle
repo_id = 'bf16a3a0-75de-4ac5-9ff2-e2f25edf4ba4'
G = pickle.load(open(repo_id+'/state_0.pkl', 'rb'))
print(G)

DiGraph with 143 nodes and 186 edges


In [3]:
G.nodes(data=True)['get_embedding']

{'name': 'get_embedding',
 'type': 'function',
 'index': '0',
 'elementname': 'embed!!function!!0!!get_embedding.py',
 'explanation': 'The function get_embedding takes in three arguments: chunks (a list), embeddings_model (a pre-trained model), and batch_size (an integer). It returns two values: chunks (the input list) and embeddings (a list of embeddings).\n\nThe function loops through the chunks list in batches of size batch_size. It calls the embeddings_model to embed each batch of chunks and appends the embeddings to the embeddings list. It also prints a progress message indicating the range of chunks that have been embedded.\n\nFinally, it returns the original chunks list and the embeddings list.',
 'info': {'generated': True,
  'date_modified': '2023-12-24 19:13:42',
  'n_info_edges': 5}}

In [14]:
import os
from chromadb import PersistentClient, HttpClient
from chromadb.utils import embedding_functions
from dotenv import load_dotenv
load_dotenv()

def return_collection(path=None, collection_name=None):
    assert path is not None, "Path isn't specified"
    assert collection_name is not None, "Collection name isn't specified"
    chroma_client = PersistentClient(path=path)
    emb_fn = embedding_functions.OpenAIEmbeddingFunction(
        api_key=os.getenv('OPENAI_API_KEY'),
        model_name="text-embedding-ada-002"
    )
    collection = chroma_client.get_or_create_collection(name=collection_name, embedding_function=emb_fn, metadata={"hnsw:space": "cosine"})
    return collection

path_ = f"{repo_id}/meta/storage"
explanations = return_collection(path=path_, collection_name="explanations")
explanations.count()

23

In [15]:
code = return_collection(path=path_, collection_name="code")
code.count()

25

In [16]:
triplets = return_collection(path=path_, collection_name="triplets")
triplets.count()

186

In [42]:
query = "What is happening with the template upload pipeline?"
res = triplets.query(
    query_texts=[query],
    n_results=10
)
[r for r in res['documents']]

[['template_upload_pipeline function-call chunking_pipeline',
  'template_upload_pipeline function-call embed_pipeline',
  'template_upload_pipeline EXTERNAL_DEPENDENCY chunking_pipeline_0',
  'template_upload_pipeline EXTERNAL_DEPENDENCY embed_pipeline_0',
  'update_database.py function template_upload_pipeline',
  'template_upload_pipeline function-call cot_pipeline',
  'template_upload_pipeline EXTERNAL_DEPENDENCY cot_pipeline_0',
  'chunking_pipeline EXTERNAL_DEPENDENCY open_files_0',
  'embed.py function embed_pipeline',
  'chunking_pipeline EXTERNAL_DEPENDENCY split_files_1']]

In [46]:
query = "What is happening with the template upload pipeline?"
res = code.query(
    query_texts=[query],
    n_results=10
)
[r for r in res['metadatas']]

[[{'containedin': 'update_database.py',
   'index': '0',
   'name': 'template_upload_pipeline',
   'type': 'function'},
  {'containedin': 'dfs.py',
   'index': '2',
   'name': 'cot_pipeline',
   'type': 'function'},
  {'containedin': 'embed.py',
   'index': '3',
   'name': 'embed_pipeline',
   'type': 'function'},
  {'containedin': 'chunker.py',
   'index': '5',
   'name': 'chunking_pipeline',
   'type': 'function'},
  {'containedin': 'embed.py',
   'index': '0',
   'name': 'get_embedding',
   'type': 'function'},
  {'containedin': 'agents.py',
   'index': '1',
   'name': 'create_assistant',
   'type': 'function'},
  {'containedin': 'embed.py',
   'index': '1',
   'name': 'open_data_json',
   'type': 'function'},
  {'containedin': 'agents.py',
   'index': '2',
   'name': 'create_thread',
   'type': 'function'},
  {'containedin': 'search.py',
   'index': '1',
   'name': 'open_embeddings_json',
   'type': 'function'},
  {'containedin': 'search.py',
   'index': '0',
   'name': 'open_data_

In [48]:
query = "What is happening with the template upload pipeline?"
res = explanations.query(
    query_texts=[query],
    n_results=10
)
[r for r in (res['metadatas'], res['documents'])]

[[[{'name': 'template_upload_pipeline', 'type': 'function'},
   {'name': 'embed_pipeline', 'type': 'function'},
   {'name': 'chunking_pipeline', 'type': 'function'},
   {'name': 'cot_pipeline', 'type': 'function'},
   {'name': 'add_files', 'type': 'function'},
   {'name': 'open_data_json', 'type': 'function'},
   {'name': 'extract_education_and_experience', 'type': 'function'},
   {'name': 'split_files', 'type': 'function'},
   {'name': 'open_embeddings_json', 'type': 'function'},
   {'name': 'store_data', 'type': 'function'}]],
 [['The code snippet defines a function called template_upload_pipeline. This function calls three other functions: chunking_pipeline, embed_pipeline, and cot_pipeline. It is likely that these functions are part of a larger pipeline for uploading and processing templates.',
   'The function embed_pipeline does two things. First, it calls the open_data_json function to retrieve some data. Then, it calls the save_embeddings function to save the embeddings of that

In [49]:
for edge in G.edges(data=True):
    if 'template_upload_pipeline' in edge:
        print(edge)

('update_database.py', 'template_upload_pipeline', {'type': 'function'})
('template_upload_pipeline', 'chunking_pipeline', {'type': 'function-call'})
('template_upload_pipeline', 'embed_pipeline', {'type': 'function-call'})
('template_upload_pipeline', 'cot_pipeline', {'type': 'function-call'})
('template_upload_pipeline', 'chunking_pipeline_0', {'type': 'EXTERNAL_DEPENDENCY'})
('template_upload_pipeline', 'embed_pipeline_0', {'type': 'EXTERNAL_DEPENDENCY'})
('template_upload_pipeline', 'cot_pipeline_0', {'type': 'EXTERNAL_DEPENDENCY'})


In [33]:
# store the output connections, what comes out and where it coes in (a train of events). How the functions are being called.

In [50]:
for edge in G.edges(data=True):
    if 'chunking_pipeline' in edge:
        print(edge)

('chunker.py', 'chunking_pipeline', {'type': 'function'})
('chunking_pipeline', 'open_files', {'type': 'function-call'})
('chunking_pipeline', 'extract_education_and_experience', {'type': 'function-call'})
('chunking_pipeline', 'store_data', {'type': 'function-call'})
('chunking_pipeline', 'open_files_0', {'type': 'EXTERNAL_DEPENDENCY'})
('chunking_pipeline', 'split_files_1', {'type': 'EXTERNAL_DEPENDENCY'})
('chunking_pipeline', 'extract_education_and_experience_0', {'type': 'EXTERNAL_DEPENDENCY'})
('chunking_pipeline', 'store_data_0', {'type': 'EXTERNAL_DEPENDENCY'})
('template_upload_pipeline', 'chunking_pipeline', {'type': 'function-call'})
