In [4]:
from openai import OpenAI
import os
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

OPENAI_API_KEY=""


client = OpenAI(
    api_key= OPENAI_API_KEY
)

def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding


In [1]:
from sentence_transformers import SentenceTransformer, util

query = "How many people live in London?"
docs = ["Around 9 Million people live in London", "London is known for its financial district"]

#Load the model
model = SentenceTransformer('sentence-transformers/msmarco-distilbert-dot-v5')


In [2]:
#Encode query and documents
query_emb = model.encode(query)
doc_emb = model.encode(docs)

#Compute dot score between query and all document embeddings
scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

#Combine docs & scores
doc_score_pairs = list(zip(docs, scores))

#Sort by decreasing score
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#Output passages & scores
print("Query:", query)
for doc, score in doc_score_pairs:
    print(score, doc)

Query: How many people live in London?
86.77784729003906 Around 9 Million people live in London
71.38754272460938 London is known for its financial district


In [6]:
df = pd.DataFrame({'text': docs})
df['ada_embedding'] = df.text.apply(lambda x: get_embedding(x))
df

Unnamed: 0,text,ada_embedding
0,Around 9 Million people live in London,"[0.023224100470542908, 0.005612279288470745, -..."
1,London is known for its financial district,"[0.011360542848706245, 2.3030845113680698e-05,..."


In [7]:
def search_reviews(df, product_description, n=3, pprint=True):
   embedding = get_embedding(product_description, model='text-embedding-ada-002')
   df['similarities'] = df.ada_embedding.apply(lambda x: cosine_similarity([x], [embedding])[0][0])
   res = df.sort_values('similarities', ascending=False).head(n)
   return res


In [8]:
ans = search_reviews(df, query)

In [9]:
ans

Unnamed: 0,text,ada_embedding,similarities
0,Around 9 Million people live in London,"[0.023224100470542908, 0.005612279288470745, -...",0.925758
1,London is known for its financial district,"[0.011360542848706245, 2.3030845113680698e-05,...",0.83778


In [10]:
texts = [
    'def sum(a, b): return a + b',
    """
    def fib(n):
        if n <= 1:
            return n
        else:
            return fib(n-1) + fib(n-2)
    """,
    """
    def factorial(n):
        if n == 1:
            return 1
        else:
            return n * factorial(n-1)
    """,
]

df = pd.DataFrame(texts, columns=['text'])

In [11]:
df['ada_embedding'] = df.text.apply(lambda x: get_embedding(x))

In [12]:
ans = search_reviews(df, 'the function that helps count the sum of fibanachi series')

In [13]:
ans

Unnamed: 0,text,ada_embedding,similarities
1,\n def fib(n):\n if n <= 1:\n ...,"[0.010823030024766922, -0.02496727555990219, -...",0.752115
0,"def sum(a, b): return a + b","[0.012739108875393867, 0.0042004454880952835, ...",0.750467
2,\n def factorial(n):\n if n == 1:\n ...,"[0.02054434083402157, -0.0212287288159132, -0....",0.713529


In [17]:
from openai import OpenAI
from pprint import pprint
client = OpenAI(api_key='')

completion = client.chat.completions.create(
  model="gpt-4-1106-preview",
  messages=[
    {"role": "system", "content": "You are a programmer assistant, skilled in explaining complex programming concepts. Act as a documentation expert for software."},
    {"role": "user", "content": "How to make a tree of function and classes from directory of files in Python. I need it to iterate recurently from the top to the botom to put the code of function with context of functions from botom to LLM model to write documentation to it."},
  ]
)

pprint(completion.choices[0].message.content.split('\n'))

['Creating a tree of functions and classes from a directory of files in '
 'Python, and then using that information to generate documentation, can be '
 'achieved through the following steps:',
 '',
 '1. **Parsing Source Files**: You will need to parse Python source files to '
 'extract the classes and functions. You can use the `ast` module in Python to '
 'parse the abstract syntax tree (AST) of the source files.',
 '',
 '2. **Recursive Traversal**: To navigate through the directory structure '
 "recursively, you'll use the `os` module to list files and directories and to "
 'walk the directory tree.',
 '',
 "3. **Organizing Data**: Once you've parsed the functions and classes, "
 'organize them in a tree structure that maintains the relationships and '
 'contexts of the code.',
 '',
 '4. **Generating Documentation**: With the structure in hand, you can pass '
 'the contents to a language model or documentation generator, providing the '
 'necessary context for generating coherent do

In [None]:
'Creating a tree of functions and classes from a directory of files in '
 'Python, and then using that information to generate documentation, can be '
 'achieved through the following steps:',
 '',
 '1. **Parsing Source Files**: You will need to parse Python source files to '
 'extract the classes and functions. You can use the `ast` module in Python to '
 'parse the abstract syntax tree (AST) of the source files.',
 '',
 '2. **Recursive Traversal**: To navigate through the directory structure '
 "recursively, you'll use the `os` module to list files and directories and to "
 'walk the directory tree.',
 '',
 "3. **Organizing Data**: Once you've parsed the functions and classes, "
 'organize them in a tree structure that maintains the relationships and '
 'contexts of the code.',
 '',
 '4. **Generating Documentation**: With the structure in hand, you can pass '
 'the contents to a language model or documentation generator, providing the '
 'necessary context for generating coherent documentation.',
 '',
 "Here's a step-by-step Python example that outlines the process:",
 '',
 '```python',
 'import os',
 'import ast',
 '',
 '# A simple class to hold information about classes and functions',
 'class CodeElement:',
 '    def __init__(self, name, type, body=None, parent=None):',
 '        self.name = name',
 '        self.type = type',
 '        self.body = body',
 '        self.parent = parent',
 '        self.children = []',
 '',
 '    def add_child(self, child):',
 '        self.children.append(child)',
 '',
 '',
 '# Recursive function to create a tree of code elements from an AST node',
 'def build_tree(node, parent=None):',
 '    tree = None',
 '    if isinstance(node, ast.FunctionDef):',
 "        tree = CodeElement(node.name, 'function', node, parent)",
 '    elif isinstance(node, ast.ClassDef):',
 "        tree = CodeElement(node.name, 'class', node, parent)",
 '    ',
 "    if tree and hasattr(node, 'body'):",
 '        for child_node in node.body:',
 '            child_tree = build_tree(child_node, parent=tree)',
 '            if child_tree:',
 '                tree.add_child(child_tree)',
 '    ',
 '    return tree',
 '',
 '',
 '# Function to parse a Python source file and build a tree of code elements',
 'def parse_source_file(file_path):',
 "    with open(file_path, 'r') as source_file:",
 '        source_code = source_file.read()',
 '    ',
 '    tree = ast.parse(source_code)',
 '    return build_tree(tree)',
 '',
 '',
 '# Recursive function to iterate through a directory and collect code element '
 'trees',
 'def iterate_directory(path, trees):',
 '    for item in os.listdir(path):',
 '        item_path = os.path.join(path, item)',
 '        if os.path.isdir(item_path):',
 '            iterate_directory(item_path, trees)',
 "        elif item_path.endswith('.py'):",
 '            trees.append(parse_source_file(item_path))',
 '',
 '',
 '# Wrapper function to start the process from the top-level directory',
 'def generate_documentation_from_directory(top_directory):',
 '    trees = []',
 '    iterate_directory(top_directory, trees)',
 '    ',
 "    # You can now iterate over the 'trees' list and pass the nodes",
 '    # to your language model or documentation generator.',
 '    # This part is left as an exercise.',
 '',
 "    # TODO: Process 'trees' to generate documentation",
 '',
 '    return trees',
 '',
 '',
 '# Example usage',
 "top_level_directory = '/path/to/your/python/project'",
 'trees = generate_documentation_from_directory(top_level_directory)',
 '',
 "# At this point, you should have a 'trees' list that contains CodeElement "
 'objects',
 '# representing the class and function hierarchy from your directory of '
 'Python files.',
 '```',
 '',
 'This example provides a structure, but not the actual generation of '
 'documentation. For the documentation generation, you will need to create a '
 'system that traverses the `CodeElement` trees and extracts information such '
 'as function parameters, docstrings, and class methods, then uses that '
 'structure as context for a language model or documentation generator.',
 '',
 "If you intend to use a large language model (LLM) like OpenAI's GPT-3 to "
 'write documentation, you will have to provide the extracted information in a '
 'way that the model can understand and generate meaningful documentation. '
 'Remember that using such a model might require additional processing to '
 'convert the code context into natural language prompts that the model can '
 'handle effectively.',
 '',
 'Keep in mind that this code does not handle all the edge cases and possible '
 'structures found in Python code. You might need to expand it to cover more '
 'Python features and produce complete documentation.'.replace('' ')

In [None]:
import os
import ast

# A simple class to hold information about classes and functions',
class CodeElement:
    def __init__(self, name, type, body=None, parent=None):',
        self.name = name
        self.type = type
        self.body = body
        self.parent = parent
        self.children = []

    def add_child(self, child):
        self.children.append(child)
 
 # Recursive function to create a tree of code elements from an AST node',
 def build_tree(node, parent=None):
        tree = None
        if isinstance(node, ast.FunctionDef):
        tree = CodeElement(node.name, 'function', node, parent)
    elif isinstance(node, ast.ClassDef):
        tree = CodeElement(node.name, 'class', node, parent)
     if tree and hasattr(node, 'body'):
 '        for child_node in node.body:',
 '            child_tree = build_tree(child_node, parent=tree)',
 '            if child_tree:',
 '                tree.add_child(child_tree)',
 '    ',
 '    return tree',
 '',
 '',
 '# Function to parse a Python source file and build a tree of code elements',
 'def parse_source_file(file_path):',
 "    with open(file_path, 'r') as source_file:",
 '        source_code = source_file.read()',
 '    ',
 '    tree = ast.parse(source_code)',
 '    return build_tree(tree)',
 '',
 '',
 '# Recursive function to iterate through a directory and collect code element '
 'trees',
 'def iterate_directory(path, trees):',
 '    for item in os.listdir(path):',
 '        item_path = os.path.join(path, item)',
 '        if os.path.isdir(item_path):',
 '            iterate_directory(item_path, trees)',
 "        elif item_path.endswith('.py'):",
 '            trees.append(parse_source_file(item_path))',
 '',
 '',
 '# Wrapper function to start the process from the top-level directory',
 'def generate_documentation_from_directory(top_directory):',
 '    trees = []',
 '    iterate_directory(top_directory, trees)',
 '    ',
 "    # You can now iterate over the 'trees' list and pass the nodes",
 '    # to your language model or documentation generator.',
 '    # This part is left as an exercise.',
 '',
 "    # TODO: Process 'trees' to generate documentation",
 '',
 '    return trees',
 '',
 '',
 '# Example usage',
 "top_level_directory = '/path/to/your/python/project'",
 'trees = generate_documentation_from_directory(top_level_directory)',