In [None]:
from openai import OpenAI
import os
import configparser
import json
import time
import random

api_key = ''
with open('../openai.txt', 'r') as file:
    api_key = file.read().replace('\n', '')

client = OpenAI(api_key=api_key)

In [2]:
def list_models():
    """
    List all available models in the OpenAI API.
    """
    models = client.models.list()
    print("Available models:")

    model_ids = []
    for model in models.data:
        model_ids.append(model.id)

    model_ids.sort()
    for model_id in model_ids:
        print(model_id)

# list_models(client)

In [3]:
def costs(prompt_tokens, completion_tokens):
    """
    Calculate the costs based on the number of tokens used.
    Args:
        prompt_tokens (int): The number of prompt tokens used.
        completion_tokens (int): The number of completion tokens used.
    Returns:
        float: The total cost in dollars.
    """
    if model_used == 'gpt-4o-mini':
        price_per_1m_input_tokens = 0.15
        price_per_1m_output_tokens = 0.6
    elif model_used == 'gpt-4o':
        price_per_1m_input_tokens = 2.5
        price_per_1m_output_tokens = 10
    elif model_used == 'o4-mini':
        price_per_1m_input_tokens = 1.10
        price_per_1m_output_tokens = 4.40
    else:
        raise ValueError(
            "Model not set or not supported for cost calculation.")
    total_cost = ((prompt_tokens / 1000000) * price_per_1m_input_tokens) + \
        ((completion_tokens / 1000000) * price_per_1m_output_tokens)
    return total_cost

In [4]:
def file_upload(location):
    """
    Upload a file to the OpenAI API.
    Args:
        location (str): The file path to upload.
    Returns:
        file: The uploaded file object.
    """
    if not location.endswith('.pdf'):
        raise ValueError("Only PDF files are allowed for upload.")

    with open(location, "rb") as file:
        file = client.files.create(
            file=file,
            purpose="user_data"
        )

    print("File uploaded successfully.")
    return file


def file_delete(file_id):
    """
    Delete a file from the OpenAI API.
    Args:
        file_id (str): The ID of the file to delete.
    Returns:
        bool: True if the file was deleted successfully, False otherwise.
    """
    try:
        client.files.delete(file_id)
        print("File deleted successfully.")
        return True
    except Exception as e:
        print(f"Error deleting file: {e}")
        return False


def list_files():
    """
    List all files uploaded to the OpenAI API.
    """
    files = client.files.list()
    print("Available files:")
    for file in files.data:
        print(file.id, file.filename, file.status)

    return

In [5]:
def load_prompts(file_path):
    config = configparser.ConfigParser(allow_no_value=True, delimiters=("="))
    config.optionxform = str
    config.read(file_path)

    prompts = {}
    for section in config.sections():
        prompts[section] = "\n".join(
            [f"{key}{value}" for key, value in config.items(section)])
    return prompts


def replace_variables(string, **kwargs):
    return string.format(**kwargs)


def read_files(file_paths):
    json_file = None
    txt_file = None
    for filename in os.listdir(file_paths):
        if filename.endswith('.json'):
            json_file = filename
        elif filename.endswith('.txt'):
            txt_file = filename

    if json_file is None or txt_file is None:
        raise ValueError("Required files not found in the directory.")

    settings = {}
    prompts = {}

    with open(os.path.join(file_paths, json_file), 'r') as f:
        settings = json.load(f)

    prompts = load_prompts(os.path.join(file_paths, txt_file))

    return settings, prompts

In [None]:
def llm_call(assistant_history, user_history, print_response=True):
    """
    Call the OpenAI API to get a response based on the provided system and user prompts.
    Args:
        assistant_history (str): The assistant's previous responses.
        user_history (str): The user's previous responses.
    Returns:
        assistant_history (str): New assistant's history.
        user_history (str): New user's history.
    """
    retries = 0
    while True:
        try:
            response = client.chat.completions.create(
                model=model_used,
                messages=assistant_history,
                max_completion_tokens=500
            )
            break
        except Exception as e:
            print(f"Error: {e}")
            retries += 1
            backoff_time = (2 ** retries) + random.random()
            print(f"Retrying in {backoff_time:.2f} seconds...")
            time.sleep(backoff_time)

    prompt_tokens = response.usage.prompt_tokens
    completion_tokens = response.usage.completion_tokens

    global global_prompt_tokens, global_completion_tokens
    global_prompt_tokens += response.usage.prompt_tokens
    global_completion_tokens += response.usage.completion_tokens

    print(f"Prompt tokens: {response.usage.prompt_tokens}")
    print(f"Completion tokens: {response.usage.completion_tokens}")
    print()

    assistant_history.append(
        {"role": "assistant", "content": response.choices[0].message.content})
    user_history.append(
        {"role": "user", "content": response.choices[0].message.content})

    if print_response:
        print(response.choices[0].message.content, end="\n\n")
        print("-" * 50)
    return assistant_history, user_history, prompt_tokens, completion_tokens

In [7]:
def store_history(student_history, teacher_history, metadata):
    """
    Store the history in a JSON file.
    Args:
        student_history (list): The student-teacher's history.
        teacher_history (list): The teacher-educator's history.
        metadata (dict): Additional metadata to store.
    """
    all_dialogues = []
    results = "results.json"

    if os.path.exists(results):
        with open(results, "r") as file:
            try:
                all_dialogues = json.load(file)
                if not isinstance(all_dialogues, list):
                    all_dialogues = [all_dialogues]
            except json.JSONDecodeError:
                # If the file is empty or invalid, start with an empty list
                all_dialogues = []

    # Append to the results file
    dialogue_results = {
        "student": student_history,
        "teacher": teacher_history,
        "metadata": metadata
    }

    all_dialogues.append(dialogue_results)

    with open(results, "w") as file:
        json.dump(all_dialogues, file, indent=4)

In [8]:
def add_tokens(tpt, tct, ttt, prompt_tokens, completion_tokens):
    tpt += prompt_tokens
    tct += completion_tokens
    ttt += prompt_tokens + completion_tokens
    return tpt, tct, ttt

In [None]:
def socratic_dialogue(settings, prompts, sources, max_iterations, dynamic, print_process):
    """
    Conduct a Socratic dialogue between a Student-Teacher and a Teacher-Educator.
    Args:
        settings (dict): The settings for the dialogue.
        prompts (dict): The prompts for the dialogue.
        sources (list): Additional sources to use in the dialogue.
        max_iterations (int): The maximum number of iterations for the dialogue.
        dynamic (bool): Whether to end dialogue dynamically based on the teacher's response.
        print_process (bool): Whether to print the process of the dialogue.
    Returns:
        question (str): The final question.
    """
    # LLM token count
    tpt = 0
    tct = 0
    ttt = 0

    iteration_count = 0

    # Add the system prompts
    st_history = [
        {
            "role": "system",
            "content": prompts["student_teacher_start"]
        }
    ]
    te_history = [
        {
            "role": "system",
            "content": prompts['teacher_educator']
        }
    ]

    # If materials are set, use them in the dialogue
    if settings['materials'] and not sources.empty():
        materials = []
        for source in sources:
            materials.append({"type": "file", "file": {"file_id": source}})

        st_history.append({
            "role": "user",
            "content": materials
        })
        te_history.append({
            "role": "user",
            "content": materials
        })

    for i in range(max_iterations):
        iteration_count += 1
        if i == 0:
            st_history, te_history, pt, ct = llm_call(
                st_history, te_history, print_process)

            # Replace the starting system prompt of the Student Teacher
            st_history[0] = {"role": "system",
                             "content": prompts['student_teacher']}
        else:
            st_history, te_history, pt, ct = llm_call(
                st_history, te_history, print_process)

        tpt, tct, ttt = add_tokens(tpt, tct, ttt, pt, ct)

        te_history, st_history, pt, ct = llm_call(
            te_history, st_history, print_process)
        tpt, tct, ttt = add_tokens(tpt, tct, ttt, pt, ct)

        if dynamic:
            # Check if the teacher thinks the question is sufficient
            if "Great question!" in te_history[-1]["content"]:
                break
    else:
        # If the loop completes without breaking, it means the iteration limit was reached
        st_history, te_history, pt, ct = llm_call(
            st_history, te_history, print_process)
        tpt, tct, ttt = add_tokens(tpt, tct, ttt, pt, ct)

    metadata = {
        "iterations": iteration_count,
        "prompt_tokens": tpt,
        "completion_tokens": tct,
        "total_tokens": ttt,
        "model_used": model_used,
        "student_level": settings['student_level'],
        "materials": settings['materials']
    }

    store_history(st_history, te_history, metadata)

    return st_history[-1]["content"]

In [None]:
def generate(topic, concepts, student_level, max_iterations, sources, combination, print_process=True):
    """
    Generate a question based on the provided parameters.
    Args:
        topic (str): The topic for the question.
        concepts (list): The concepts to include in the question.
        student_level (str): The student's level.
        max_iterations (int): The maximum number of iterations for the question generation.
        sources (list): Additional sources to use in the question.
        combination (str): The combination of concepts to use in the question.
        print_process (bool): Whether to print the parts of the question generation process.
    Returns:
        str: The final question generated.
    """
    if max_iterations <= 0:
        print("Max iterations must be greater than 0.")
        return None

    directory = "Prompts"
    subdir_path = None

    for subdir in os.listdir(directory):
        if subdir == combination:
            # If the subdirectory matches the combination, use it
            subdir_path = os.path.join(directory, subdir)
            break

    if subdir_path is None:
        print(f"Invalid combination: {combination}")
        return None

    dynamic = True if combination[0] == 'd' else False

    settings, prompts = read_files(subdir_path)
    prompts = {key: replace_variables(value, topic=topic, concepts=concepts,
                                      student_level=student_level) for key, value in prompts.items()}

    question = socratic_dialogue(
        settings, prompts, sources, max_iterations, dynamic, print_process)

    return question

In [11]:
# A sample process of uploading a file:
# file_upload('file-path-relative-to-this-notebook')
# list_files()

In [None]:
model_used = "gpt-4o-mini"
# model_used = "gpt-4o"

# Change the following variables to customize the prompt
# The overall topic
topic = "Basics of how the internet works"
# Concepts to cover - if provided more than one, model will choose one at random
concepts = """
 - Decentralization of the internet
 - Servers, datacenters and routers
 - Server vs client
 - Data packets
 - IP addresses
"""
# The target audience
student_level = "8th or 9th grade"

# Maximum number of iterations for the Socratic dialogue
# MUST BE SET (5 is good enough, 10 tends to be too much)
max_iterations = 3

# List of ids of additional files to be used as sources for the model
# First, upload the files using file_upload()
# For list of files and their ids, use list_files()
sources = []

# Specify the target combination of additional parameters
# d/s = dynamic dialogue length / static dialogue length
# l/w = provide student level / without student level
# m/w = provide additional sources / without additional sources
combination = "slw"

# The results are stored in results.json
# If the file already exists, the results will be appended to it

In [None]:
global_prompt_tokens = 0
global_completion_tokens = 0

# Generate the question based on the provided parameters
final_response = generate(topic, concepts, student_level,
                          max_iterations, sources, combination, True)
print("Final response:")
print(final_response)

Prompt tokens: 219
Completion tokens: 131
The Student's response:  
How does the decentralization of the internet influence how data packets are transmitted between servers and clients?

In crafting this question, I focused specifically on the concept of decentralization and its impact on data transmission. This allows students to explore why the internet's structure is designed in such a way and how it affects the journey of data packets from servers to clients. Additionally, by connecting the concept of decentralization with the roles of servers and clients, students can think critically about the efficiency and reliability of the internet. This question encourages them to analyze the underlying principles of the internet's architecture without overwhelming them with too many key concepts at once.

--------------------------------------------------
Prompt tokens: 497
Completion tokens: 36
The Teacher's feedback:  
How might changing the focus from the process of data transmission to 

In [18]:
print("Total prompt tokens:", global_prompt_tokens)
print("Total completion tokens:", global_completion_tokens)
print(costs(global_prompt_tokens, global_completion_tokens), "USD")

Total prompt tokens: 4006
Total completion tokens: 651
0.0009915 USD


In [None]:
def print_dialogue(idx=-1):
    """
    Print the dialogue history number idx from the results.json file if it exists.

    Student History is from the perspective of the student-teacher,
    Teacher History is from the perspective of the teacher-educator.

    Args:
        idx (int): The index of the dialogue to print. If negative, it prints the last dialogue.
    """
    if os.path.exists("results.json"):
        with open("results.json", "r") as file:
            data = json.load(file)

            if len(data) <= abs(idx):
                print(
                    f"Index {idx} is out of range. Available indices: 0 to {len(data) - 1}.")
                return

            dialogue = data[idx]
            print("Student History:")
            for message in dialogue["student"]:
                addon = ""
                if message['role'] == 'assistant':
                    addon = "(student)"
                elif message['role'] == 'user':
                    addon = "(teacher)"

                print(f"{message['role']} {addon}:")
                print(f"{message['content']}")
                print("-" * 100)
            print("\nTeacher History:")
            for message in dialogue["teacher"]:
                addon = ""
                if message['role'] == 'assistant':
                    addon = "(teacher)"
                elif message['role'] == 'user':
                    addon = "(student)"

                print(f"{message['role']} {addon}:")
                print(f"{message['content']}")
                print("-" * 100)
            print("\nMetadata:")
            print(dialogue["metadata"])

In [None]:
print_dialogue(-1) # Prints the last dialogue in results.json

Student History:
system :
You are a Student-Teacher tasked with developing reflective questions for educational purposes. Your mentor, the Teacher-Educator, has provided you with feedback on your initial question and explanation. Your task is to analyze the Teacher-Educator's feedback and revise your question accordingly. The question should be adequate for students of 8th or 9th grade. Do not try to needlessly connect all the provided concepts as that can make the question too complex and difficult to answer. Instead, focus on one concept or only a few more.
This is part of a Socratic dialogue, where the Teacher-Educator guides you through questions and feedback to help you think critically and improve your work.
The topic of the question: Basics of how the internet works
Key concepts that can be incorporated: 
 - Decentralization of the internet
 - Servers, datacenters and routers
 - Server vs client
 - Data packets
 - IP addresses

Always respond only with the revised question and e