In [10]:
import os
import os
import pickle
from typing import Dict, List, Literal, Optional, Any
from pydantic import BaseModel
import dspy

from dotenv import load_dotenv
load_dotenv()

dspy.configure(lm=dspy.OpenAI(model="gpt-4-0125-preview", max_tokens=4096))

CODEBASE_DIR_NAME = 'oa-small-dev'
GIT_PATH = os.path.dirname(os.getcwd())
CODEBASE_ROOT = os.path.join(GIT_PATH, CODEBASE_DIR_NAME)
CODEBASE_ROOT

'/Users/macbook/Documents/GitHub/oa-small-dev'

In [11]:
# The description is meant to grow... Add more descriptive fields... 

class DirDescription(BaseModel):
    name : str
    description : str
    long_description : str

class FileDescription(BaseModel):
    name : str
    description : str


class PyFileDescription(BaseModel):
    name : str
    description : str

In [12]:

class FileDescriptorSignature(dspy.Signature): # WE maybe want to give more context in input here... like what's the direcory around him about??...
    file_name = dspy.InputField(desc='The name of the file')
    file_content = dspy.InputField(desc='Content of the File')
    long_description = dspy.OutputField(desc="A description of the file content and purpose.") # we don;t really hint the length here... 

class FileDescriptor(dspy.Module):
    def __init__(self):
        super().__init__()
        self.file_descriptor = dspy.ChainOfThought(FileDescriptorSignature)

    def forward(self, file_name:str, file_content:str):
        long_description = self.file_descriptor(
            file_name=file_name,
            file_content=file_content
        )
        return long_description

class File:
    def __init__(self, name, content):
        self.name : str = name
        self.content : str = content
        self.description : FileDescription = None

    def get_description(self):
        if self.description is None:
            descriptor = FileDescriptor()
            description = descriptor(self.name, self.content)
            self.description = FileDescription(name=self.name, 
                                               description=description.get("long_description"))
        return self.description

    def __repr__(self):
        return f"File(name={self.name})"




In [4]:

class PyFileSignature(dspy.Signature):
    file_name = dspy.InputField(desc='The name of the Python file')
    file_content = dspy.InputField(desc='Content of the Python file')
    long_description = dspy.OutputField(desc="Detailed description of the Python file.\
                                        the description must contain the documentation for the function classes and methods. \
                                        The description must contain the imports and the dependencies of the file.")

class PyFileDescriptor(dspy.Module):
    def __init__(self):
        super().__init__()
        self.analyzer = dspy.ChainOfThought(PyFileSignature)

    def forward(self, file_name: str, file_content: str):
        long_description = self.analyzer(
            file_name=file_name,
            file_content=file_content
        )
        return long_description


class PyFile(File):
    def __init__(self, name, content):
        super().__init__(name, content)

    def __repr__(self):
        return f"PyFile(name={self.name}"
    
    def get_description(self):
        if self.description is None:
            python_descriptor = PyFileDescriptor()
            description = python_descriptor(self.name, self.content)
            self.description = PyFileDescription(name=self.name,
                                                description=description.get("long_description"))
        return self.description

In [5]:

class DirDescriptorSignature(dspy.Signature):
    directory_structure = dspy.InputField(desc='The structure of the directory')
    files_descriptions = dspy.InputField(desc='The descriptions of the files in the directory.')
    children_descriptions = dspy.InputField(desc='The descriptions of the children directories in the directory.')
    long_description = dspy.OutputField(desc="Detailed descrition of the content of the directory.")

class DirDescriptonSummarizerSignature(dspy.Signature):
    long_description = dspy.InputField(desc='A detailed description of the directory')
    short_description = dspy.OutputField(desc="Summary of the content of the directory.\
                                                 By looking at this summary, a software engineer should be able to quickly understand what is in the directory and what it is used for.")
    
class DirDescriptor(dspy.Module):
    def __init__(self):
        super().__init__()
        self.dir_descriptor = dspy.ChainOfThought(DirDescriptorSignature)
        self.dir_summarizer = dspy.ChainOfThought(DirDescriptonSummarizerSignature)

    def forward(self, children_descriptions: List[DirDescription], files_descriptions: List[FileDescription], directory_structure : str):
        long_description = self.dir_descriptor(
            children_descriptions="\n".join([f"Child Directory: {d.name}\n:{d.description}" for d in children_descriptions]),
            files_descriptions="\n".join([f"File: {f.name}\n:{f.description}" for f in files_descriptions]),
            directory_structure=directory_structure
        )
        long_description_str = long_description.get("long_description")
        short_description = self.dir_summarizer(long_description=long_description_str)
        return long_description, short_description

class Directory:
    def __init__(self, name, parent=None):
        self.name : str = name
        self.parent : str = parent
        self.children : List[Directory] = []
        self.files : List[File] = []
        self.description : DirDescription = None

    def get_description(self):
        if self.description is None:
            descriptor = DirDescriptor()
            children_descriptions = [d.get_description() for d in self.children]
            files_descriptions = [f.get_description() for f in self.files]
            directory_structure = self.get_structure()
            long_description, short_description = descriptor(children_descriptions, files_descriptions, directory_structure)
            self.description = DirDescription(name=self.name, 
                                              description=short_description.get("short_description"),
                                              long_description=long_description.get("long_description"))
        return self.description

    def add_file(self, file):
        self.files.append(file)

    def add_directory(self, directory_name):
        new_directory = Directory(directory_name, self)
        self.children.append(new_directory)
        return new_directory

    def remove_file(self, file_name):
        self.files = [file for file in self.files if file.name != file_name]

    def find_directory(self, path):
        if path == self.name:
            return self
        for child in self.children:
            result = child.find_directory(path)
            if result:
                return result
        return None

    def __repr__(self):
        return f"Directory(name={self.name})"

    def get_structure(self, indent=0):
        output = []
        output.append(' ' * indent + self.name + '/')
        for file in self.files:
            output.append(' ' * (indent + 4) + repr(file))
        for child in self.children:
            output.append(child.get_structure(indent + 4))
        return '\n'.join(output)






In [6]:

class RootDirectory(Directory):
    def __init__(self, root_path=None, skip_hidden=True):
        if root_path:
            super().__init__(os.path.basename(root_path) or root_path)
            self.load_structure(root_path, skip_hidden)
        else:
            super().__init__('root')

    # def load_structure(self, root_path, skip_hidden):
    #     if not os.path.isdir(root_path):
    #         return None

    #     for dirpath, dirnames, filenames in os.walk(root_path, topdown=True):
    #         current_dir = self.find_directory(os.path.basename(dirpath) or dirpath)
    #         if current_dir is None:
    #             continue

    #         if skip_hidden:
    #             dirnames[:] = [d for d in dirnames if not d.startswith('.')]

    #         for dirname in dirnames:
    #             current_dir.add_directory(dirname)

    #         for filename in filenames:
    #             if skip_hidden and filename.startswith('.'):
    #                 continue
    #             filepath = os.path.join(dirpath, filename)
    #             file_content = open(filepath, 'r').read()
    #             if filename.endswith('.py'):
    #                 current_dir.add_file(PyFile(filename, file_content))
    #             else:
    #                 current_dir.add_file(File(filename, file_content))

    def load_structure(self, root_path, skip_hidden):
        allowed_extensions = ('.py', '.txt')  # Define allowed file extensions

        if not os.path.isdir(root_path):
            return None

        for dirpath, dirnames, filenames in os.walk(root_path, topdown=True):
            current_dir = self.find_directory(os.path.basename(dirpath) or dirpath)
            if current_dir is None:
                continue

            if skip_hidden:
                dirnames[:] = [d for d in dirnames if not d.startswith('.')]
                filenames = [f for f in filenames if not f.startswith('.')]

            for dirname in dirnames:
                current_dir.add_directory(dirname)

            for filename in filenames:
                if not filename.endswith(allowed_extensions):
                    continue

                filepath = os.path.join(dirpath, filename)
                with open(filepath, 'r') as file:  # Use 'with' to handle files (ensures files are properly closed)
                    file_content = file.read()

                if filename.endswith('.py'):
                    current_dir.add_file(PyFile(filename, file_content))
                elif filename.endswith('.txt'):
                    current_dir.add_file(File(filename, file_content))


    def save_to_file(self, file_path):
        with open(file_path, 'wb') as f:
            pickle.dump(self, f)

    @staticmethod
    def load_from_file(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
        

In [7]:
# Example usage:
root_dir = RootDirectory(CODEBASE_ROOT)
_ = root_dir.get_description()

root_dir.save_to_file('root_directory.pkl')

In [8]:
_dir = RootDirectory().load_from_file('root_directory.pkl')

In [9]:
_dir.description

DirDescription(name='oa-small-dev', description='Advanced Python project directory focusing on software development and data analysis with AI integration. Features dependency management, automation, and modular design for scalability and complex tasks.', long_description="This directory encapsulates a sophisticated Python project aimed at leveraging advanced technologies for software development and data analysis. At its root, it contains a `requirements.txt` file that lists all necessary dependencies, ensuring the project's stability across different environments. The `start_architecting.py` file serves as an entry point, setting up the environment and integrating with essential external services like GitHub and possibly OpenAI's API, indicating a focus on automation, code management, and leveraging AI for enhanced functionality. The `src/` directory forms the project's backbone, housing Python files that integrate OpenAI's GPT for natural language processing, Pydantic for robust data