In [1]:
# Import all packages
from openai import OpenAI
import json
import os
from tqdm import tqdm
import time

In [2]:
# Replace 'YOUR_API_KEY' with your actual OpenAI API key
API_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
client = OpenAI(api_key=API_KEY)

In [1]:
# Provide instruction to the assistant API. These instruction were obtained through meta analysis
PROMPT_1 = "Refer to the PDF for five exercise prompts: Coding, Parsons, MCQs, Troubleshooting, Fill in the Blanks. For each, create an exercise matching the indicated type. Use placeholders '<<<' and '>>>' for specific details provided in the thread. Exercises should align with the course's progression and context. Ensure JSON formatted responses with consistent key components as outlined."

f"""In the PDF provided there are 5 different prompts (Prompt1, Prompt 2, Prompt 3, ...) to generate 5 different types of excercises i.e.  
        1) Coding Excercise
        2) Parsons Excercise
        3) Multiple Choice Questions
        4) Troubleshooting Problems
        5) Fill in the blanks
        Please only look at the prompt that corresponds to the excercise instructed. The values for the placeholders indicated by "<<<" and ">>>" will be provided to you in the thread.
  """

'In the PDF provided there are 5 different prompts (Prompt1, Prompt 2, Prompt 3, ...) to generate 5 different types of excercises i.e.  \n        1) Coding Excercise\n        2) Parsons Excercise\n        3) Multiple Choice Questions\n        4) Troubleshooting Problems\n        5) Fill in the blanks\n        Please only look at the prompt that corresponds to the excercise instructed. The values for the placeholders indicated by "<<<" and ">>>" will be provided to you in the thread.\n      '

In [4]:
# file = client.files.create(
#   file=open("Prompt_Draft_5.pdf", "rb"),
#   purpose='assistants'
# )

In [3]:
# Create a new assistant for every new host i.e. the instructor or the class or the course or the university
assistant = client.beta.assistants.create(
  name="Excercise Assistant 4 (no retrieval - multithread test - prompt draft 6)",
  description="This assistant will help you generate excercises for your Introduction to Python Programming course.",
  instructions = "You are an instructor for an Introduction to Python Programming course. You want to generate excercises for your students to practice. There are 5 different types of excercises to generate. Details will be provided in the thread.",
  model="gpt-4-1106-preview",
  # tools=[{"type": "retrieval"}],
  # file_ids=[file.id]
)

In [4]:
# Create a new thread for every new user
# thread = client.beta.threads.create()

In [4]:
FULL_CATALOG = course_catalogue = {
    "Variables and Data Types": [
        "Variable Declaration (int float string boolean tuples)",
        "Variable Assignment",
        "Variable Scope",
        "Constants",
        "Multiple Assignment",
        "Simultaneous Assignment",
        "Augmented Assignment Operators",
        "Type Conversion or Type Casting",
        "Boolean Types",
        #"Using is digit to check"
    ],
    "Strings": [
        "Print Statements",
        "Escape Sequences",
        "String formatting",
        "String concatenation",
        "String slicing",
        "String methods (e.g. upper() lower() find() etc.)",
        "Escaping characters in strings"
    ],
    "Conditional Statements": [
        "Basic If statement",
        "Basic If else statement",
        "if-elif-else statements",
        "Nested if statements",
        "Comparison operators in if statements"
    ],
    "For Loops": [
        "Convert from while to for",
        "for loop with range iterate over string",
        "for loop with linespace iterate over a list",
        "Nested for loop",
        "for and while loop combo",
        "for loop with a break and continue statements",
        "Dynamic for loop"
    ],
    "While Loops": [
        "Simple While Loops",
        "While loop with conditions",
        "Infinite While Loops",
        "While loop with break and continue statements",
        "While loop with else statement"
    ],
    "Lists": [
        "Single data type",
        "Multiple data types",
        "Nested lists or 2D lists",
        "Variable length lists",
        "List comprehension"
    ],
    "Dictionaries": [
        "Creating a dictionary",
        "Accessing values in a dictionary",
        "Nested Dictionaries (JSON)",
        "Modifying values in a dictionary",
        "Dictionary methods (e.g. keys() values() items())",
        "Nested dictionaries"
    ],
    "Functions (Modular Programming)": [
        "Defining a function",
        "Parameters and arguments",
        "Return statement",
        "Default parameters",
        "Variable-length arguments"
    ],
    "File Handling": [
        "Opening and reading a file",
        "Writing to a file",
        "Appending to a file",
        "Closing a file",
        "Handling file exceptions"
    ],
    "Error Handling": [
        "Try-except block",
        "Handling specific exceptions",
        "Try-except-finally block",
        "Raising custom exceptions",
        "Using the else clause with try-except"
    ],
    "Modules": [
        "Importing modules",
        "Creating your own module",
        "Using aliases for module names",
        "Exploring module contents",
        "Using built-in modules (e.g. math random)"
    ],
    "Classes and Objects": [
        "Object Methods",
        "Class Declaration",
        "Creating a class",
        "Defining attributes and methods",
        "Creating an object",
        "Encapsulation",
        "Inheritance",
        "Multiple Inheritance",
        "Polymorphism"
    ],
    "Recursion": [
        "Defining a recursive function",
        "Base case in recursion",
        "Factorial using recursion",
        "Fibonacci sequence using recursion",
        "Handling recursion depth"
    ]
    # ,
    # "Other": [
    #     "Input",
    #     "Input Validation Exercises",
    #     "Comments"
    # ]
}

In [2]:
FULL_PROMPTS = {
"Coding Exercise": """
    1.	Role and Objective: As an AI, your task is to create Python programming exercises tailored for freshmen in an 'Introduction to Python Programming' course at a large public university in the USA. Each time you encounter this prompt, generate a new exercise.
    2.	Exercise Specification:
      a. Category and Subcategory: Design exercises for specific topics identified as <<<CATEGORY>>> and <<<SUBCATEGORY>>>. Your exercises should be consistent with the course's progression as outlined in the COURSE CATALOG. Please reference this catalog to ensure alignment with the current syllabus topics, and avoid introducing concepts not yet covered in the specified category or subcategory.
      b. Difficulty Level: The exercises should match the specified <<<DIFFICULTY>>> level.
    3. Output Requirements: Your output should be JSON formatted and include the following components:
      a. "exercise":
        *. "task": Provide the task details as a concise and clear MARKDOWN formatted string.
        *. “sample input”: If applicable, offer an example input here in MARKDOWN format; otherwise, return 'NA'.
        *. “sample output”: Similarly, provide an example output in MARKDOWN format; otherwise, return 'NA'.
      b. “solution”: Include the correct Python code solution to the exercise as a string of python code.
      c. “prerequisite”: Mention any prerequisite knowledge, using a maximum of 50 words, formatted in MARKDOWN. Depending on the exercise, you can provide some background information here if needed. 
      d. “hint”: Optionally, provide hints to assist students, without revealing the solution. Limit this to 50 words and format in MARKDOWN.
    4. Formatting Example: Follow exactly the JSON structure exemplified below for CATEGORY: FUNCTION (MODULAR PROGRAMMING) | SUBCATEGORY: DEFINING A FUNCTION | DIFFICULTY: MEDUIM:
      ```json
      {
        "exercise": {
          "task": "Write a Python program that defines a function named `is_palindrome` which takes a string as input and returns `True` if the string is a palindrome (reads the same forward and backward, ignoring case), and `False` otherwise.",
          "sample input": "`is_palindrome('Level')`",
          "sample output": "`True` because 'Level' reads the same forwards and backwards when case is ignored.",
        },
        "solution": "def is_palindrome(s):\n    s = s.lower()\n    return s == s[::-1]\n\n# Example usage:\nprint(is_palindrome('Level')) # Outputs: True",
        "prerequisite": "Students should be familiar with string manipulation, lowercasing, and slicing in Python.",
        "hint": "Consider using string methods to normalize the case and slicing to reverse the string."
      }
      ```
    5.	Consistency and Analysis: Maintain consistent formatting for these exercises, as they will be used later for analysis of the AI's exercise generation capabilities.
    6.	Cultural and Contextual Relevance: If applicable, incorporate elements that consider the cultural and contextual backgrounds of the students, to make the exercises more engaging and relatable.
          """,


"Parsons Exercise": """
    1.	Role and Objective: As an AI, your task is to create PYTHON PARSONS EXERCISE tailored for freshmen in an 'Introduction to Python Programming' course at a large public university in the USA. Each time you encounter this prompt, generate a new exercise.
    2.	Exercise Specification:
      a. Category and Subcategory: Design exercises for specific topics identified as <<<CATEGORY>>> and <<<SUBCATEGORY>>>. Your exercises should be consistent with the course's progression as outlined in the COURSE CATALOG. Please reference this catalog to ensure alignment with the current syllabus topics, and avoid introducing concepts not yet covered in the specified category or subcategory.
      b. Difficulty Level: The exercises should match the specified <<<DIFFICULTY>>> level.
    3. Output Requirements: Your output should be JSON formatted and include the following components:
      a. "exercise":
        *. "task": Write a clear and concise code description as a MARKDOWN formatted string. This should clearly explain the parson excercise requirement.
        *: "lines of code":  Present code blocks in MARKDOWN format, labeled with letters such as A, B, C, etc. Create interest by varying the sizes of these code chunks instead of simply dividing each line of code into separate blocks.
      b. “solution”:  Provide the correct sequence of code blocks as a solution to the exercise. List the blocks in their correct order, for example, ['D', 'B', 'C', 'A', ...].
      c. “prerequisite”: Detail any necessary prerequisite knowledge, using a maximum of 50 words and formatted in MARKDOWN. Include relevant background information as needed.
      d. “hint”: Optionally, provide hints to assist students, without revealing the solution. Limit this to 50 words and format in MARKDOWN.
    4. Formatting Example: Follow exactly the JSON structure exemplified below for CATEGORY: VARIABLES AND DATA TYPES | SUBCATEGORY: VARIABLE DECLARATION | DIFFICULTY: MEDUIM:
      ```json
      {
        "exercise": {
          "task": "Rearrange the following code blocks to declare a tuple named `weather_data` with the following values: date = '2023-04-15', temperature = 22.4, humidity = 60, is_raining = False. Then, print the following string using the tuple: 'On 2023-04-15, the temperature was 22.4C with 60% humidity and raining: False'.",
          "lines of code": {
            "A: weather_data = (date, temperature, humidity, is_raining)",
            "B: date = '2023-04-15'",
            "C: temperature = 22.4",
            "D: humidity = 60`",
            "E: is_raining = False",
            "F: print(f'On {date}, the temperature was {temperature}C with {humidity}% humidity and raining: {is_raining}')"
          },
          "solution": ["B", "C", "D", "E", "A", "F"],
          "prerequisite": "Knowledge of tuples and formatting string output is expected. Students should be able to assign and use variables before they are printed.",
          "hint": "Remember to declare all your variables before you attempt to group them into a tuple. Then, use the tuple to format your print statement."
        }
      }
      ```
    5.	Consistency and Analysis: Maintain consistent formatting for these exercises, as they will be used later for analysis of the AI's exercise generation capabilities.
    6.	Cultural and Contextual Relevance: If applicable, incorporate elements that consider the cultural and contextual backgrounds of the students, to make the exercises more engaging and relatable.
""",


"Multiple Choice Questions": """ 
    1.	Role and Objective: As an AI, your task is to create PYTHON MULTIPLE CHOICE QUESTIONS EXERCISE tailored for freshmen in an 'Introduction to Python Programming' course at a large public university in the USA. Each time you encounter this prompt, generate a new exercise.
    2.	Exercise Specification:
      a. Category and Subcategory: Design exercises for specific topics identified as <<<CATEGORY>>> and <<<SUBCATEGORY>>>. Your exercises should be consistent with the course's progression as outlined in the COURSE CATALOG. Please reference this catalog to ensure alignment with the current syllabus topics, and avoid introducing concepts not yet covered in the specified category or subcategory.
      b. Difficulty Level: The exercises should match the specified <<<DIFFICULTY>>> level.
    3. Output Requirements: Your output should be JSON formatted and include the following components:
      a. "exercise": 
        *. "task": Provide a general description of the MCQs, formatted in MARKDOWN.
        *. “questions”: Present five questions in a JSON format, using MARKDOWN for formatting. Include a code snippet if relevant.
      b. “solution”: State the correct answers in a string of python dictionary, e.g., {1:"B", 2:"C", 3:"A", ...}.
      c. “prerequisite”: Detail any necessary prerequisite knowledge, using a maximum of 50 words and formatted in MARKDOWN. Include relevant background information as needed.
      d. “hint”: Optionally, provide hints to assist students, without revealing the solution. Limit this to 50 words and format in MARKDOWN.
    4. Formatting Example: Follow exactly the JSON structure exemplified below for CATEGORY: VARIABLES AND DATA TYPES | SUBCATEGORY: VARIABLE DECLARATION | DIFFICULTY: MEDUIM:
      
      ```json
            {
              "exercise": {
                "task": "The following multiple-choice questions are designed to evaluate a student's understanding of variable declaration and the use of different data types in Python.",

                "questions": {
                  "1": "Given the variable declaration `pi = 3.14159`, which of the following statements is true?",
                  "options_1": {
                    "A": "`pi` is an integer",
                    "B": "`pi` is a tuple",
                    "C": "`pi` is a float",
                    "D": "`pi` is a string"
                  },

                  "2": "How would you correctly create a tuple `t` with elements 1, 2, and 3?",
                  "options_2": {
                    "A": "t = 1, 2, 3",
                    "B": "t = (1, 2, 3,)",
                    "C": "t = (1, 2, 3)",
                    "D": "A and C are both correct"
                  },

                  ...
                },

                "solution": {
                  "1": "C",
                  "2": "D",
                  "3": "B",
                  "4": "B",
                  "5": "B"
                },

                "prerequisite": "Students should have a good understanding of Python syntax for variables and data types, including integers, floats, booleans, strings, and tuples.",
                "hint": "Recall the syntax for declaring variables and remember how Python differentiates between tuples and other data types based on the commas and parentheses used."
              }
            }
      ```
    5.	Consistency and Analysis: Maintain consistent formatting for these exercises, as they will be used later for analysis of the AI's exercise generation capabilities.
    6.	Cultural and Contextual Relevance: If applicable, incorporate elements that consider the cultural and contextual backgrounds of the students, to make the exercises more engaging and relatable.
""",




"Troubleshooting Problem": """
    1.  Role and Objective: As an AI, your task is to PYTHON TROUBLESHOOTING EXERCISE tailored for freshmen in an 'Introduction to Python Programming' course at a large public university in the USA. Each time you encounter this prompt, generate a new exercise.
    2.  Exercise Specification:
      a. Category and Subcategory: Design exercises for specific topics identified as <<<CATEGORY>>> and <<<SUBCATEGORY>>>. Your exercises should be consistent with the course's progression as outlined in the COURSE CATALOG. Please reference this catalog to ensure alignment with the current syllabus topics, and avoid introducing concepts not yet covered in the specified category or subcategory.
      b. Difficulty Level: The exercises should match the specified <<<DIFFICULTY>>> level.
    3. Output Requirements: Your output should be JSON formatted and include the following components:
      a. "exercise":
        *. "scenario": Provide the scenario in MARKDOWN format
        *. “provided code”: Provide the string of Python code that the student needs to troubleshoot.
        *. “Instructions”: Insert any necessary instructions here. Ensure they are clear and to the point. If no specific instructions are needed, simply write NA in this section.
      b. “solution”:  Insert the correct solution to the exercise here, formatted as a string of Python code.
      c. “prerequisite”: Specify any prerequisites required for this exercise, such as understanding of natural numbers or familiarity with certain mathematical or geometric concepts. If no prerequisites are needed, simply write NA in this section.
    4. Formatting Example: Follow exactly the JSON structure exemplified below for CATEGORY: VARIABLES AND DATA TYPES | SUBCATEGORY: VARIABLE DECLARATION | DIFFICULTY: MEDUIM:
    
    ```json
          {
            "exercise": {
              "scenario": "You are tutoring a group of freshmen in an 'Introduction to Python Programming' course. One of the students has written a script to calculate the area of a rectangle. The script is supposed to ask for the rectangle's length and width, calculate the area, and print the result. However, the script isn't running correctly, and the student needs help to troubleshoot the errors.",
              "provided code": "length = input('Enter the length of the rectangle: ')\nwidth = input('Enter the width of the rectangle: ')\narea = lenght * width\nprint('The area of the rectangle is: ' + area)",
              "Instructions": "Identify the mistakes in the code and correct them to ensure the program calculates the area properly and outputs it to the user.",
              }
          "solution": "length = float(input('Enter the length of the rectangle: '))\nwidth = float(input('Enter the width of the rectangle: '))\narea = length * width\nprint('The area of the rectangle is: ' + str(area))",
          "prerequisite": "Students should understand the concept of variables and data types, especially how to use input functions, perform arithmetic operations, and handle type casting."
            }
          }
    ```
    5.  Consistency and Analysis: Maintain consistent formatting for these exercises, as they will be used later for analysis of the AI's exercise generation capabilities.
    6.  Cultural and Contextual Relevance: If applicable, incorporate elements that consider the cultural and contextual backgrounds of the students, to make the exercises more engaging and relatable.
    """,




"Fill in the Blanks": """
    1.  Role and Objective: As an AI, your task is to create a PYTHON FILL IN THE BLANKS EXERCISE tailored for freshmen in an 'Introduction to Python Programming' course at a large public university in the USA. Each time you encounter this prompt, generate a new exercise.
    2.  Exercise Specification:
      a. Category and Subcategory: Design exercises for specific topics identified as <<<CATEGORY>>> and <<<SUBCATEGORY>>>. Your exercises should be consistent with the course's progression as outlined in the COURSE CATALOG. Please reference this catalog to ensure alignment with the current syllabus topics, and avoid introducing concepts not yet covered in the specified category or subcategory.
      b. Difficulty Level: The exercises should match the specified <<<DIFFICULTY>>> level.
    
    3. Output Requirements: Your output should be JSON formatted and include the following components:
      a. "exercise":
        *. "task": This exercise covers various fundamental concepts of Python, such as syntax, data types, control structures, functions, and basic programming practices.
        *. “questions”: Provide five questions in JSON. Use MARKDOWN for formatting. If the question is a code snippet, then provide the string of python code.
      b. “solution”: Provide the correct answer as JSON {1: string, 2:indentation, 3:if, etc.}
      c. “prerequisites”: What are the prerequisites? Depending on the exercise, you can provide some background information if needed.  Make sure it's MARKDOWN formatted—50 words max.
      b. “hint”: If you deem it necessary, use this paragraph to provide hints to the student. Don't give away the solution/answer here. Make sure it's MARKDOWN formatted—50 words max.
    
    4. Formatting Example: Follow exactly the JSON structure exemplified below for CATEGORY: VARIABLES AND DATA TYPES | SUBCATEGORY: VARIABLE DECLARATION | DIFFICULTY: MEDUIM:
      ```json
        {
          "exercise": {
            "task": "This exercise focuses on deepening the understanding of Python's core concepts, including variable handling and syntax conventions.",

            "questions": {
              "1": "A _________ in Python is a named ...",
              "2": "In Python, the _________ keyword is used to define a new function.",
              "3": "A _________ is a special type of ..",
              ...
            },

            "solution": {
              "1": "variable",
              "2": "x, y",
              "3": "def",
              "4": "list",
              "5": "True, False"
            },

            "prerequisites": "Basic knowledge of Python including variables, data types, functions, and lists, as well as familiarity with boolean values.",
            "hint": "Consider the fundamental usage of variables, how assignment works, the syntax for creating functions and lists, and the two possible boolean values in Python."
          }
        }
      ```
    5.  Consistency and Analysis: Maintain consistent formatting for these exercises, as they will be used later for analysis of the AI's exercise generation capabilities.
    6.  Cultural and Contextual Relevance: If applicable, incorporate elements that consider the cultural and contextual backgrounds of the students, to make the exercises more engaging and relatable.
    """
}




In [6]:
code, parsons, mcq, troubleshooting, fill_in_the_blanks = """exercise,task,sample input,sample output,solution,prerequisite,hint
exercise,task,lines of code,"A:,"B:,"C:,"D:,"E:,"F:, solution,prerequisite,hint
exercise,task,questions,1,2,3,4,5,options_1,options_2,options_3,options_4,options_5,A,B,C,D,solution,prerequisite,hint
exercise,scenario,provided code,Instructions,solution,prerequisite
exercise,task,questions,1,2,3,4,5,solution, 1,2,3,4,5,prerequisites,hint""".split("\n")

code_keys, parsons_keys, mcq_keys, troubleshooting_keys, fill_in_the_blanks_keys = code.split(","), parsons.split(","), mcq.split(","), troubleshooting.split(","), fill_in_the_blanks.split(",")


all_keys = code_keys + parsons_keys + mcq_keys + troubleshooting_keys + fill_in_the_blanks_keys

In [7]:
list(set(all_keys))

['sample output',
 '"E:',
 '"C:',
 'solution',
 'prerequisite',
 '"A:',
 '5',
 'hint',
 'A',
 'options_4',
 'sample input',
 '"F:',
 '3',
 'exercise',
 'options_3',
 'options_2',
 'provided code',
 'D',
 'task',
 '"B:',
 '2',
 ' solution',
 'options_5',
 'prerequisites',
 'Instructions',
 '"D:',
 ' 1',
 '1',
 'questions',
 'scenario',
 '4',
 'C',
 'options_1',
 'B',
 'lines of code']

In [8]:
# Sum all the values in the dictionary FULL_CATALOG
TOTAL_SUBCATEGORIES = sum([len(value) for key, value in FULL_CATALOG.items()])
TOTAL_SUBCATEGORIES

78

In [None]:
"""

Given the instructions above. You are to create an excercise by filling the placeholders indicated by <<< >>>:
SUBCATEGORY: Basic if else statement,
CATEGORY: Conditional Statements,
DIFFICULTY:  MEDIUM,
Please only provide the JSON as instructed in the prompt.

"""

In [3]:
# TEMP_PROMPT = f"""Look at the PROMPT # {x} ({EXERCISE_TYPE}) and generate an exercise 
# following the instruction in the prompt. Use the following to fill the placeholders in the prompt:
#     SUBCATEGORY:{SUBCATEGORY}, CATEGORY: {CATEGORY}, DIFFICULTY:{DIFFICULTY}.
# Please only provide the JSON as instructed in the prompt.
# """
def get_prompt(FULL_PROMPTS, CATEGORY_ORIG, SUBCATEGORY_ORIG, EXERCISE_TYPE_ORIG, DIFFICULTY_ORIG):
    prompt = f"""You are to create an excercise of 
                    TYPE: ({EXERCISE_TYPE_ORIG}). 
                    INSTRUCTIONS:
                      {FULL_PROMPTS[EXERCISE_TYPE_ORIG]}. 
                    PLACEHOLDERS: Use the following to fill the placeholders indicated by <<< >>>:
                        SUBCATEGORY:{SUBCATEGORY_ORIG}, 
                        CATEGORY: {CATEGORY_ORIG}, 
                        DIFFICULTY:{DIFFICULTY_ORIG},
                    Please only provide the JSON as instructed in the prompt.
                    """
    return prompt

def process_strings(list_of_strings):
   return [element.lower().replace(" ", "_") for element in list_of_strings]

def make_api_call(assistant_id, thread_id, prompt):
    # Replace this part with your API call and processing logic
    message = client.beta.threads.messages.create(
                thread_id=thread_id,
                role="user",
                content=prompt,
            )
    
    # Create a run
    run = client.beta.threads.runs.create(
        thread_id=thread_id,
        assistant_id=assistant_id,
        # Additional options if needed
    )


    return run, message

def store_text_in_file(category, subcategory, exercise_type, difficulty, file_name, text):
    # Construct the directory path
    dir_path = f"KHAN_GEN/FINAL_RUN/{category}/{subcategory}/{exercise_type}/{difficulty}"

    # Create the directory if it does not exist
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    # Write the text to the file
    with open(os.path.join(dir_path, file_name), "w") as f:
        f.write(text)

In [9]:
print(get_prompt(FULL_PROMPTS, "Variables and Data Types", "Variable Declaration", "Fill in the Blanks", "MEDIUM"))
# "Parsons Exercise", "Multiple Choice Questions", "Troubleshooting Problem", "Fill in the Blanks"

You are to create an excercise of 
                    TYPE: (Fill in the Blanks). 
                    INSTRUCTIONS:
                      
    1.  Role and Objective: As an AI, your task is to create a PYTHON FILL IN THE BLANKS EXERCISE tailored for freshmen in an 'Introduction to Python Programming' course at a large public university in the USA. Each time you encounter this prompt, generate a new exercise.
    2.  Exercise Specification:
      a. Category and Subcategory: Design exercises for specific topics identified as <<<CATEGORY>>> and <<<SUBCATEGORY>>>. Your exercises should be consistent with the course's progression as outlined in the COURSE CATALOG. Please reference this catalog to ensure alignment with the current syllabus topics, and avoid introducing concepts not yet covered in the specified category or subcategory.
      b. Difficulty Level: The exercises should match the specified <<<DIFFICULTY>>> level.
    
    3. Output Requirements: Your output should be JSON form

In [10]:
# Export thread_list to a pickle file
import pickle
def export_dict(dict_to_export, file_name):
    with open(file_name, "wb") as f:
        pickle.dump(dict_to_export, f)

def import_dict(path="KHAN_GEN", file_name="thread_list.pkl"):
    file = os.path.join(path, file_name)
    with open(file, "rb") as f:
        return pickle.load(f)

In [16]:
thread_list = import_dict(file_name="PICKLE_FILES/thread_list_run9_final_complete.pkl")
assistant = import_dict(file_name="PICKLE_FILES/assistant_run7_final_complete.pkl")


In [17]:
thread_list_vardec = import_dict(file_name="PICKLE_FILES/thread_list_run8_variable_declaration_subcategory.pkl")
# Append the thread_list_vardec to the thread_list
thread_list.update(thread_list_vardec)


In [21]:
from IPython.display import clear_output
CATALOG = FULL_CATALOG
counter = 0
failed_counter = 0
unknown_counter = 0
#thread_list = {}
# Assuming CATALOG is a dictionary with categories as keys and list of subcategories as values
for CATEGORY_ORIG in list(FULL_CATALOG.keys()):
    SUBCATEGORIES_ORIG = FULL_CATALOG[CATEGORY_ORIG]
    #SUBCATEGORIES_ORIG = [i for i in FULL_CATALOG[CATEGORY_ORIG] if i not in ["Variable Declaration (int float string boolean tuples)"]]
    clear_output(wait=False)
    for SUBCATEGORY_ORIG in SUBCATEGORIES_ORIG:
        for EXERCISE_TYPE_ORIG in list(FULL_PROMPTS.keys()):
            for x, DIFFICULTY_ORIG in enumerate(["Easy", "Medium", "Hard"]):
                CATEGORY, SUBCATEGORY, EXERCISE_TYPE, DIFFICULTY = process_strings([CATEGORY_ORIG, SUBCATEGORY_ORIG, EXERCISE_TYPE_ORIG, DIFFICULTY_ORIG])
                # Create a new thread for every new category, subcategory, exercise type, and difficulty
                #thread_list[f"{CATEGORY}_{SUBCATEGORY}_{EXERCISE_TYPE}_{DIFFICULTY}"] = client.beta.threads.create()
                thread = thread_list[f"{CATEGORY}_{SUBCATEGORY}_{EXERCISE_TYPE}_{DIFFICULTY}"]
                if failed_counter > 5:
                    1/0
                for i in range(0,10):
                    FILE_NAME = f"KHAN_GEN/FINAL_RUN/{CATEGORY}/{SUBCATEGORY}/{EXERCISE_TYPE}/{DIFFICULTY}/{CATEGORY}_{SUBCATEGORY}_{EXERCISE_TYPE}_{DIFFICULTY}_{i}.txt"
                    if os.path.exists(FILE_NAME):
                        print(f"EXISTS: {FILE_NAME}. Skipping...")
                        continue
                    if i == 0:
                        # print(f"Category: {CATEGORY} | Subcategory: {SUBCATEGORY} | Exercise Type: {EXERCISE_TYPE} | Difficulty: {DIFFICULTY}")
                        TEMP_PROMPT = get_prompt(FULL_PROMPTS, CATEGORY_ORIG, SUBCATEGORY_ORIG, EXERCISE_TYPE_ORIG, DIFFICULTY_ORIG)
                        run, message = make_api_call(assistant.id, thread.id, TEMP_PROMPT)
                        
                        # Retrieve the run
                        time.sleep(15)
                        run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
                        while run.status == 'in_progress':
                            time.sleep(3)
                            run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)

                        FILE_NAME = f"{CATEGORY}_{SUBCATEGORY}_{EXERCISE_TYPE}_{DIFFICULTY}_{i}"
                        if run.status == 'completed':
                            messages = client.beta.threads.messages.list(thread_id=thread.id).data
                            latest_message = messages[0]
                            text = latest_message.content[0].text.value
                            FILE_NAME += ".txt"
                            counter += 1
                        elif run.status == 'failed':
                            text = "FAILED"
                            FILE_NAME += "_failed.txt"
                            failed_counter += 1
                        else:
                            text = "UNKNOWN"
                            FILE_NAME += "_unknown.txt"
                            unknown_counter += 1
                        
                        store_text_in_file(CATEGORY, SUBCATEGORY, EXERCISE_TYPE, DIFFICULTY, FILE_NAME, text)
                        print(f"Created: {FILE_NAME} | Success Counter: {counter} | Failed Counter: {failed_counter} | Unknown Counter: {unknown_counter}")
                    else:
                        # print(f"Category: {CATEGORY} | Subcategory: {SUBCATEGORY} | Exercise Type: {EXERCISE_TYPE} | Difficulty: {DIFFICULTY}")
                        TEMP_PROMPT = "Create another one following the same instructions."
                        run, message = make_api_call(assistant.id, thread.id, TEMP_PROMPT)
                        
                        # Retrieve the run
                        time.sleep(15)
                        run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
                        while run.status == 'in_progress':
                            time.sleep(3)
                            run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)

                        FILE_NAME = f"{CATEGORY}_{SUBCATEGORY}_{EXERCISE_TYPE}_{DIFFICULTY}_{i}"
                        if run.status == 'completed':
                            messages = client.beta.threads.messages.list(thread_id=thread.id).data
                            latest_message = messages[0]
                            text = latest_message.content[0].text.value
                            FILE_NAME += ".txt"
                            counter += 1
                        elif run.status == 'failed':
                            text = "FAILED"
                            FILE_NAME += "_failed.txt"
                            failed_counter += 1
                        else:
                            text = "UNKNOWN"
                            FILE_NAME += "_unknown.txt"
                            unknown_counter += 1
                        
                        store_text_in_file(CATEGORY, SUBCATEGORY, EXERCISE_TYPE, DIFFICULTY, FILE_NAME, text)
                        print(f"Created: {FILE_NAME} | Success Counter: {counter} | Failed Counter: {failed_counter} | Unknown Counter: {unknown_counter}")                 
export_dict(thread_list, "KHAN_GEN/thread_list_run8_variable_declaration_subcategory.pkl")
export_dict(assistant, "KHAN_GEN/assistant_run8_variable_declaration_subcategory.pkl")
# Rest of your code

#RUN7 took 2037 minutes and 12.1 seconds
#RUN8(Completing dataset to 10 files) took? started when counter was 3468
# After generating 7266 exercises, the assistant stopped working. Failed 133 times so i shut it down and restarted. The logs were copied. and failure threshold shut down was implemented
# The current thread list doesnt have threads for the first category, they are stored somewhere. I am not sure where. You may need to import them from one of the pkl, probably thread_list.pkl and complete that category after this run.

EXISTS: KHAN_GEN/FINAL_RUN/recursion/defining_a_recursive_function/coding_exercise/easy/recursion_defining_a_recursive_function_coding_exercise_easy_0.txt. Skipping...
EXISTS: KHAN_GEN/FINAL_RUN/recursion/defining_a_recursive_function/coding_exercise/easy/recursion_defining_a_recursive_function_coding_exercise_easy_1.txt. Skipping...
EXISTS: KHAN_GEN/FINAL_RUN/recursion/defining_a_recursive_function/coding_exercise/easy/recursion_defining_a_recursive_function_coding_exercise_easy_2.txt. Skipping...
EXISTS: KHAN_GEN/FINAL_RUN/recursion/defining_a_recursive_function/coding_exercise/easy/recursion_defining_a_recursive_function_coding_exercise_easy_3.txt. Skipping...
EXISTS: KHAN_GEN/FINAL_RUN/recursion/defining_a_recursive_function/coding_exercise/easy/recursion_defining_a_recursive_function_coding_exercise_easy_4.txt. Skipping...
EXISTS: KHAN_GEN/FINAL_RUN/recursion/defining_a_recursive_function/coding_exercise/easy/recursion_defining_a_recursive_function_coding_exercise_easy_5.txt. Skip

In [30]:
len(thread_list)

15

In [38]:
export_dict(thread_list, "KHAN_GEN/thread_list_run7_final_backup.pkl")
export_dict(assistant, "KHAN_GEN/assistant_run7_final_backup.pkl")

In [17]:
"""By default, a Run will use the model and tools configuration specified in Assistant object, but you can override most of these when creating the Run for added flexibility:
"""

In [12]:
list(thread_list.keys())[:30]

['variables_and_data_types_variable_assignment_coding_exercise_easy',
 'variables_and_data_types_variable_assignment_coding_exercise_medium',
 'variables_and_data_types_variable_assignment_coding_exercise_hard',
 'variables_and_data_types_variable_assignment_parsons_exercise_easy',
 'variables_and_data_types_variable_assignment_parsons_exercise_medium',
 'variables_and_data_types_variable_assignment_parsons_exercise_hard',
 'variables_and_data_types_variable_assignment_multiple_choice_questions_easy',
 'variables_and_data_types_variable_assignment_multiple_choice_questions_medium',
 'variables_and_data_types_variable_assignment_multiple_choice_questions_hard',
 'variables_and_data_types_variable_assignment_troubleshooting_problem_easy',
 'variables_and_data_types_variable_assignment_troubleshooting_problem_medium',
 'variables_and_data_types_variable_assignment_troubleshooting_problem_hard',
 'variables_and_data_types_variable_assignment_fill_in_the_blanks_easy',
 'variables_and_data_t

In [18]:
while client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id).status != 'completed':
    continue
messages = client.beta.threads.messages.list(thread_id=thread.id).data
latest_message = messages[0]
text = latest_message.content[0].text.value
print(text)

Based on the course catalog provided, I have created a dictionary representation with categories as keys and lists of their respective subcategories as values:

```python
course_catalogue = {
    "Variables and Data Types": [
        "Variable Declaration (int float string boolean tuples)",
        "Variable Assignment",
        "Variable Scope",
        "Constants",
        "Multiple Assignment",
        "Simultaneous Assignment",
        "Augmented Assignment Operators",
        "Type Conversion/Type Casting",
        "Boolean Types",
        "Using is digit to check"
    ],
    "Strings": [
        "Print Statements",
        "Escape Sequences",
        "String formatting",
        "String concatenation",
        "String slicing",
        "String methods (e.g. upper() lower() find())",
        "Escaping characters in strings"
    ],
    "Conditional Statements": [
        "Basic If statement",
        "Basic If else statement",
        "if-elif-else statements",
        "Nested if s

In [105]:
for msg in messages:
    if msg.role == 'assistant' and msg.content[0].text.value[:2] == "**":
        print(msg.content[0].text.value)
        print("\n_________________________________________________________________\n")

**Troubleshooting Problem**

```json
{
  "Task": "Identify and fix the logical, syntactical, or runtime error in the following Python program:",
  "Code": "number = 5\nwhile number < 10:\n    print(number)\n    for i in range(0, number):\n        number += 1",
  "Output": "The program should correctly print the numbers from 5 to 9 without entering an infinite loop.",
  "Hint": "Consider the conditions under which the while loop terminates."
}
```

_________________________________________________________________

**Coding Exercise**

```json
{
  "Task": "Write a Python program using a 'for' loop to replace a 'while' loop that prints all the even numbers from 1 to n where n is provided as input.",
  "Input": "A number 'n' from 1 to 100 indicative of the upper limit of the even numbers range.",
  "Output": "A list of even numbers from 1 to the input 'n'.",
  "Example": "If 'n' is 10, the output should be [2, 4, 6, 8, 10]."
}
```

__________________________________________________________

In [38]:
CATALOG = {
    "Loops": []
}

for category in CATALOG:
    SUBCATEGORIES =   CATALOG[category]
    for subcategory in SUBCATEGORIES:
        message = client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            content=PROMPT_2,
        )

In [36]:
# Get the output from the run
output = run.get('output')
print(output)

AttributeError: 'Run' object has no attribute 'get'

In [33]:
# Assuming 'run' is the result of your previous run creation
run_id = run.id

# Add a user message to the conversation
response = client.beta.threads.runs(run_id).messages.create(
    role="user",
    content=PROMPT_2,
)

TypeError: 'Runs' object is not callable

### Chat Completion

In [None]:
from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Who won the world series in 2020?"},
    {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
    {"role": "user", "content": "Where was it played?"}
  ]
)

## Old Code

In [None]:
# Function to generate exercise
def generate_exercise(category, course_content_pdf):
  prompt = prompts[category] + f" given the course content from {course_content_pdf}"
  response = openai.Completion.create(
    engine="text-davinci-003",
    prompt=prompt,
    max_tokens=500,
    n=1,
    stop=None,
    temperature=0.5,
  )
  return response.choices[0].text.strip()

# Function to check code
def check_code(code, test_cases):
  prompt = f"Check the following Python code against the test cases:\n{code}\n\nTest Cases:\n{test_cases}\nIs the code correct for all test cases?"
  response = openai.Completion.create(
    model="code-davinci-002", 
    prompt=prompt,
    temperature=0,
    max_tokens=100,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
  )
  return response.choices[0].text.strip()

In [None]:
# Example usage  
exercise = generate_exercise("loops", "intro_to_python.pdf")
print(exercise) 

student_code = "ANSWER CODE HERE"
test_cases = "TEST CASE INPUT/OUTPUT HERE"
check_result = check_code(student_code, test_cases)
print(check_result)