# Notebook Purpose
1. Take in the current directory & file structure where the notebook is being run

2. ChatGPT Nano analyzes the current structure and offers an impoved file structure for readability, and formating standards

3. Outputs are parsed as a rubric on a 10 point scale and a theoretical new directory structure with previous names found in parentheses on the same line

4. A secondary output is also made in the form of a custom JSON formats for both outputs independently & together  

In [None]:
import os
import openai
import re
import json
### Need to add api key to function

In [None]:
def directory_tree_json():
    """
    Generates a directory tree starting from the specified path
    and returns it as a dictionary.
    """

    directory_tree = {}
    home_path = os.getcwd()
    base_name = os.path.basename(home_path)

    for root, dirs, files in os.walk(home_path):
        # Build the tree structure
        sub_tree = {"folders": dirs, "files": files}
        directory_tree[home_path] = sub_tree

    with open(f'directory_{base_name}.json', "w") as json_file:
        json.dump(directory_tree, json_file, indent=4)

directory_tree_json()

In [None]:

def review_directory():

  home_path = os.getcwd()
  base_name = os.path.basename(home_path)
  tree_file = "directory_" + base_name + '.json'
  client = openai.OpenAI(api_key=OPENAI_API_KEY)

  with open(tree_file, 'r') as newfile:
    directory_tree_file = json.load(newfile)
    directory_dump = json.dumps(directory_tree_file)
  print(directory_tree_file)
  prompt = f"""
             Review the given Directory tree json file dictionary and grade it on the parameters given:
             1. Kedro Organization standards
             2. Kedro Naming standards
             3. If applicable, Packaging standards
             The Output should be the following:
             1. Return only valid JSON
             2. Contain a grade for each aspect out of 100 with 2 bullet points for the reasonings for the grades and how to improve the directory tree
             3. The format should follow the structure of the example format:
             example format:
             '{{
                 "graded_content": {{
                     "Kedro_Organization_Standards": {{
                         "grade": 60,
                         "reasons": [
                             "All data files are in the root directory, lacking necessary separation into 'data' and 'notebooks' folders.",
                             "No clear structure following Kedro's modular design principles, which can complicate pipeline management."
                             ],
                         "improvements": "Create separate directories for 'data', 'src', and 'notebooks'."
                         }}
                 }}
             }}'

            '''
            {directory_tree_file}
            '''

          """

  try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                      {'role' : 'system', 'content' : 'You are a Data pipeline engineer designing a repository, dont be verbose'},
                      {"role": "user", "content": prompt}
                     ],
            max_tokens=500
        )
        output = response.choices[0].message.content

        print(output)
        print('output break')
        clean_output = output.strip()
        clean_json_str = "".join(line.strip() for line in clean_output.splitlines())

        parsed_json0 = json.loads(clean_json_str)
        # Save to JSON file
        with open(f'graded_{base_name}.json', 'w') as json_file:
            json.dump(parsed_json0, json_file, indent=4)

  except Exception as e:
        return f"Error: {str(e)}"
review_directory()

{'/content': {'folders': [], 'files': ['anscombe.json', 'README.md', 'california_housing_train.csv', 'mnist_train_small.csv', 'california_housing_test.csv', 'mnist_test.csv']}}
{
    "graded_content": {
        "Kedro_Organization_Standards": {
            "grade": 40,
            "reasons": [
                "All files are placed directly under the '/content' directory, lacking appropriate separation into 'data' and 'notebooks' directories.",
                "The absence of modular structure makes it difficult to distinguish between data files and project documentation."
            ],
            "improvements": "Create dedicated directories such as 'data', 'notebooks', and 'src' to enhance organization."
        },
        "Kedro_Naming_Standards": {
            "grade": 70,
            "reasons": [
                "The filenames are descriptive but do not adhere to a consistent naming convention (e.g., 'anscombe.json' could be clearer with context).",
                "File extensio

In [None]:

def revised_directory():

  home_path = os.getcwd()
  base_name = os.path.basename(home_path)
  tree_file = "directory_" + base_name + '.json'
  client = openai.OpenAI(api_key=OPENAI_API_KEY)

  with open(tree_file, 'r') as newfile:
    directory_tree_file = json.load(newfile)
    directory_dump = json.dumps(directory_tree_file)

  print(directory_tree_file)

  prompt = f"""
             Review the given Directory tree json directory tree and return an improved tree based on the following criteria:
             1. Kedro Organization standards
             2. Kedro Naming standards
             3. If applicable, Packaging standards
             The Output should be the following
             1. Return only valid JSON
             2. Contain an improved directory tree with files and directories allowed to be renamed for proper formatting however the previous name must also be present in double brackets
             3. The format should follow the structure of the example format:
             example format:
             '{{
                 "/directory": {{
                     "folders": [
                      "/folder1",
                      "/folder2"
                     ],
                     "files": [
                         "file.json",
                         "file.csv"
                         "README.md",
                         "file.csv"
                         ]
                     }}
             }}'

            '''
            {directory_tree_file}
            '''

          """
  try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                      {'role' : 'system', 'content' : 'You are a Data pipeline engineer designing a repository, dont be verbose'},
                      {"role": "user", "content": prompt}
                     ],
            max_tokens=500
        )
        output = response.choices[0].message.content

        print(output)
        print(repr(output))
        print('output break')
        clean_output = output.strip()

        match = re.search(r'(\{.*\})', clean_output, re.DOTALL)
        if match:
            clean_json_str = match.group(1)
        else:
            raise ValueError("No valid JSON object found in the API output.")

        parsed_json0 = json.loads(clean_json_str)
        # Save to JSON file
        with open(f'revised_{base_name}.json', 'w') as json_file:
            json.dump(parsed_json0, json_file, indent=4)

  except Exception as e:
        return f"Error: {str(e)}"
revised_directory()

{'/content': {'folders': [], 'files': ['anscombe.json', 'README.md', 'california_housing_train.csv', 'mnist_train_small.csv', 'california_housing_test.csv', 'mnist_test.csv']}}
```json
{
    "/content": {
        "folders": [],
        "files": [
            "anscombe.json",
            "README.md",
            "california_housing_train.csv",
            "mnist_train_small.csv",
            "california_housing_test.csv",
            "mnist_test.csv"
        ]
    },
    "/data": {
        "folders": [],
        "files": [
            "california_housing_train.csv [[california_housing_train.csv]]",
            "california_housing_test.csv [[california_housing_test.csv]]",
            "mnist_train_small.csv [[mnist_train_small.csv]]",
            "mnist_test.csv [[mnist_test.csv]]"
        ]
    },
    "/data/metadata": {
        "folders": [],
        "files": [
            "anscombe.json [[anscombe.json]]"
        ]
    }
}
```
'```json\n{\n    "/content": {\n        "folders": [],\n  