In [8]:
# !pip install langchain langchain-openai langchain-community faiss-cpu tiktoken

In [9]:
from langchain_openai import OpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains import RetrievalQA
import ast
import os
from dataclasses import dataclass
from typing import List
import inspect
from dotenv import load_dotenv

import os

In [10]:
load_dotenv()
openai_api_key = os.environ.get('OPENAI_API_KEY')


In [11]:

@dataclass
class Document:
    page_content: str
    metadata: dict

def extract_code_blocks(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
        tree = ast.parse(content, filename=file_path)
    
    code_blocks = []

    # Extract functions and classes
    for node in ast.walk(tree):
        if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
            code_blocks.append(node)

    # Extract top-level executable statements (direct children of the module that are not function or class defs)
    top_level_stmts = [node for node in tree.body if isinstance(node, ast.stmt) and not isinstance(node, (ast.FunctionDef, ast.ClassDef))]
    if top_level_stmts:
        # We wrap them in a Module for consistent handling later
        code_blocks.append(ast.Module(body=top_level_stmts, type_ignores=[]))

    return code_blocks

def embed_code_blocks_from_files(file_paths: List[str]):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

    docs = []
    for file_path in file_paths:
        code_blocks = extract_code_blocks(file_path)
        for block in code_blocks:
            source_code = ast.unparse(block)
            if hasattr(block, 'lineno'):
                line_number = block.lineno
            else:
                line_number = 'Not available'  # For wrapped top-level statements without a line number

            metadata = {
                'file_path': file_path,
                'line_number': line_number
            }
            doc = Document(page_content=source_code, metadata=metadata)
            docs.append(doc)

    library = FAISS.from_documents(docs, embeddings)

    return library

def get_python_files_in_directory(directory_path):
    python_files = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.py'):
                python_files.append(os.path.join(root, file))
    return python_files

directory_path = './games' # Path to the directory containing the Python files
python_files = get_python_files_in_directory(directory_path)

library = embed_code_blocks_from_files(python_files)
print("FAISS library initialized with embedded functions.")


FAISS library initialized with embedded functions.


In [12]:

query = "How do i write an __init__ function in python?"
# query = "give me the __init__function from soduko"
# query = "give me the top level code in pong.py"





In [13]:
for i in range(4):

    query_answer = library.similarity_search(query)
    file_path = query_answer[i].metadata['file_path']
    line_number = query_answer[i].metadata['line_number']
    page_content = query_answer[i].page_content


    def print_code_response(query_result):
        content = query_result.page_content
        metadata = query_result.metadata

        print(f"File: {metadata['file_path']}")
        print(f"Line Number: {metadata['line_number']}")

        print("\nPython code from file:")
        print(content)

    query_answer = Document(
        page_content=page_content,
        metadata={'file_path': file_path, 'line_number': line_number}
    )
    print('---------------------------------------------')
    if i+1 == 1:
        print('The most relevant code snippet is:')
    else:
        print(f'The {i+1}th most relevant code snippet is:')
    print_code_response(query_answer)


---------------------------------------------
The most relevant code snippet is:
File: ./games/sodoku.py
Line Number: 6

Python code from file:
def __init__(self):
    self.screen = pygame.display.set_mode((540, 540))
    self.clock = pygame.time.Clock()
    self.font = pygame.font.Font(None, 36)
    self.board = [[5, 3, 0, 0, 7, 0, 0, 0, 0], [6, 0, 0, 1, 9, 5, 0, 0, 0], [0, 9, 8, 0, 0, 0, 0, 6, 0], [8, 0, 0, 0, 6, 0, 0, 0, 3], [4, 0, 0, 8, 0, 3, 0, 0, 1], [7, 0, 0, 0, 2, 0, 0, 0, 6], [0, 6, 0, 0, 0, 0, 2, 8, 0], [0, 0, 0, 4, 1, 9, 0, 0, 5], [0, 0, 0, 0, 8, 0, 0, 7, 9]]
    self.selected = None
---------------------------------------------
The 2th most relevant code snippet is:
File: ./games/memory_puzzle.py
Line Number: 7

Python code from file:
def __init__(self):
    self.screen = pygame.display.set_mode((800, 600))
    self.clock = pygame.time.Clock()
    self.font = pygame.font.Font(None, 36)
    self.setup_game()
---------------------------------------------
The 3th most relevant