In [15]:
import os
import openai
import json

openai.api_key = os.getenv("OPENAI_API_KEY")

def get_completion(system, prompt, model="gpt-3.5-turbo"):
    messages = [
        {"role": "system", "content": system},
        {"role": "user", "content": str(prompt)},
        ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

# Prompt Engineering Tests

### Version 1
Your task is to automatically categorize and sort files based on a given array of files within a folder. 
Create folder names depending on the context of the names of the given files. 

Provide them in a structured JSON format with the following keys: 
file, folder. 

**Notes for V1:**
- Just a test to see if it works, works well! (GPT-3.5-Turbo)

### Version 2
You are an intelligent file sorter. Your task is to analyze an array of files within a given folder and categorize them based on their names and types. 
Note: Your task is only to sort files, not existing folders. Existing folders should be left as they are.
The files could be of any type and their names could suggest a wide variety of activities. The categorization should be fully dependent on the names and types of the files. For instance, a file named 'Lab Report.pdf' suggests an academic activity and should be placed in an 'Academics' folder, while a '.jpg' file with a name like 'FamilyPhoto' suggests a personal activity and should be placed in a 'Family' folder.
Do not default to a generic 'Main' folder for files that are difficult to categorize. Instead, take the time to analyze each file name and type thoroughly to determine the most appropriate category. If a file's category is not immediately clear, consider the broader context or make an educated guess based on similar files.
Pay special attention to file names that include technical terms, abbreviations, or specific patterns. Use these clues to guide your categorization. For example, files that include terms like 'heatmap', 'footprint', or 'config' might be related to a specific project or task and should be categorized accordingly.
Only create subfolders when there are a significant number of files that can be further categorized within a main folder. If there are only a few files of a certain type, they should be kept in the main folder. Unnecessary subfolders should be avoided.
Your output should be a structured JSON format with the following keys: 'file' (the original file name), 'folder' (the main folder where the file should be moved), and 'subfolder' (an optional key, only to be used when necessary for further categorization). 
Remember, the goal is to create a logical and intuitive file system that makes it easy to locate and understand the context of each file. Validate your categorizations to ensure they make sense with the file's name and type.

**Notes for V2:**
- Works for smaller size folders with a few files (E.g. just a few pdfs, images etc.)
- Doesn't work with specific files (e.g. Physics folder with curriculum info, notes, worksheets OR Python folder with code, data collection, etc.)
- Prompt may need to be changed to work with more specific files, and somehow categorize them with careful analysis.



In [19]:
files = ['VSA ART x DT - Reimagine Hong Kong | Official Promotional Video.mp4', 'Grades Calculation.xlsx', 'University Planning Sheet.xlsx', 'Steven Pressfield - Do The Work!.pdf', 'Memoji Image.jpg', '2023-08-15 Ethical Hacking Club Updates.pdf', 'VSA_Yearbook_2022-23.pdf', 'Cat Pic.JPG', 'Budget Sheet.xlsx']

systemPrompt = f"""
You are an intelligent file sorter. Your task is to analyze an array of files within a given folder and categorize them based on their names and types. 
Note: Your task is only to sort files, not existing folders. Existing folders should be left as they are.
The files could be of any type and their names could suggest a wide variety of activities. The categorization should be fully dependent on the names and types of the files. For instance, a file named 'Lab Report.pdf' suggests an academic activity and should be placed in an 'Academics' folder, while a '.jpg' file with a name like 'FamilyPhoto' suggests a personal activity and should be placed in a 'Family' folder.
Do not default to a generic 'Main' folder for files that are difficult to categorize. Instead, take the time to analyze each file name and type thoroughly to determine the most appropriate category. If a file's category is not immediately clear, consider the broader context or make an educated guess based on similar files.
Pay special attention to file names that include technical terms, abbreviations, or specific patterns. Use these clues to guide your categorization. For example, files that include terms like 'heatmap', 'footprint', or 'config' might be related to a specific project or task and should be categorized accordingly.
Only create subfolders when there are a significant number of files that can be further categorized within a main folder. If there are only a few files of a certain type, they should be kept in the main folder. Unnecessary subfolders should be avoided.
Your output should be a structured JSON format with the following keys: 'file' (the original file name), 'folder' (the main folder where the file should be moved), and 'subfolder' (an optional key, only to be used when necessary for further categorization). 
Remember, the goal is to create a logical and intuitive file system that makes it easy to locate and understand the context of each file. Validate your categorizations to ensure they make sense with the file's name and type.
"""

sortedFiles = json.loads(get_completion(systemPrompt, files))
print(sortedFiles)

{'files': [{'file': 'VSA ART x DT - Reimagine Hong Kong | Official Promotional Video.mp4', 'folder': 'Videos'}, {'file': 'Grades Calculation.xlsx', 'folder': 'Academics'}, {'file': 'University Planning Sheet.xlsx', 'folder': 'Academics'}, {'file': 'Steven Pressfield - Do The Work!.pdf', 'folder': 'Books'}, {'file': 'Memoji Image.jpg', 'folder': 'Images'}, {'file': '2023-08-15 Ethical Hacking Club Updates.pdf', 'folder': 'Clubs'}, {'file': 'VSA_Yearbook_2022-23.pdf', 'folder': 'Yearbooks'}, {'file': 'Cat Pic.JPG', 'folder': 'Images'}, {'file': 'Budget Sheet.xlsx', 'folder': 'Finance'}]}
