In [None]:
import os
import json
import pandas as pd

userlog_path = "UserLog"
yt_urls = []
# list of micronotes ->[ [{'point': <string>, 'created_at': <number>}] ]
micro_notes = []
base_notes = []
onboardings = []

for folder_number in range(1, 13):
    user_folder = f'P{folder_number}'
    print(f'Processing {user_folder}')
    user_folder_path = os.path.join(userlog_path, user_folder)
    if os.path.isdir(user_folder_path):
        folder_number = int(user_folder[1:])
        for subdir, _, files in os.walk(user_folder_path):
            for file in files:

                str_rep = ''
                
                file_path = os.path.join(subdir, file)
                if file == 'onboarding.json':
                    with open(file_path, 'r') as f:
                        onboarding_data = json.load(f)
                    onboardings.append(onboarding_data)
                elif file.lower().startswith('video1') or file.lower().startswith('video2'):
                    with open(file_path, 'r', encoding='utf-8') as f:
                        bullet_points_data = json.load(f)
                    
                    note_points = [
                        {
                            'point': bpd['point'],
                            'transcript': bpd['fraction_transcript']
                        }
                        for bpd in bullet_points_data['editHistory']
                    ]
                    
                    if folder_number % 2 == 0:
                        if file.lower().startswith('video1'):
                            micro_notes.append(note_points)
                    else:
                        if file.lower().startswith('video2'):
                            micro_notes.append(note_points)


In [None]:
def get_formatted_prompt_string(user_idx):
    # Simulating the check for existing onboardings
    # take_onboarding_into_prompt = bool(onboardings)
    take_onboarding_into_prompt = True

    # Filter out onboardings with non-empty notes and non-empty keypoints
    new_onboardings = [onboarding for onboarding in onboardings[user_idx] if onboarding['note'] and all(onboarding['keypoints'])]

    # Update take_onboarding_into_prompt based on the filtered onboardings
    if not new_onboardings:
        take_onboarding_into_prompt = False

    prompt_string = "I want you to act as a personalized note-taking assistant. Users will give you a keypoint and the YouTube transcript. " + \
                "Your task is to expand the keypoint into a note point, by taking additional context from the transcript. The note should be a full sentence in simple English. " + \
                "Follow these rules:\n1. Resolve any typos or grammatical mistakes that arise in the keypoint.\n2. The note should not be longer than 1 sentence. " + \
                "3. Use uncommon words, synonyms, and alternative phrases to avoid using vocabulary that is frequently repeated in the transcript. Try to incorporate words that are absent from the transcript. " + \
                "4. Provide new insights or context that may not be directly stated in the transcript, or even suggest related information outside the scope of the transcript to make the note more unique. " + \
                "5. The note should be more abstract and explore areas or elaboration that goes beyond the original transcript content.\n6. Just write a single note point. " + \
                "7. Avoid using phrases like 'This video talks about...' or 'The speaker explains...'. Write in a style that would be typical in a notepad."


    prompt_string = "I want you to act as a personalized note-taking assistant. Users will give you a keypoint and the youtube transcript. " + \
                    "Your task is to expand the keypoint into a note point, by taking additional context from the transcript. The note should be a full sentence in simple english. " + \
                    "Follow these rules:\n1. Resolve any typos or grammatical mistakes that arise in the keypoint.\n2. The note should not be longer than 1 sentence. " + \
                    "3. Remember that the keypoint can be very abstract and as short as an abbreviation. Use the transcript to get additional information to ensure a good quality note expansion.\n" + \
                    "4. Just write a single note point, users will request repeatedly for new points they want to add.\n" + \
                    "5. Write it in a way a user would write in a notepad. Do not use sentences such as 'This video talks about...', 'The speaker explains..' etc."
    if take_onboarding_into_prompt:
        prompt_string += "\nMake sure that the note aligns with the user's writing style, so that they can read it easily. Use the same writing style as shown below.\n" + \
                         "Here are three examples:\n"

        for onboarding in new_onboardings:
            prompt_string += "Transcript: ..." + onboarding['transcript'] + "...\n" + \
                             "Keypoint: " + ", ".join(onboarding['keypoints']) + "\n" + \
                             "Note: " + onboarding['note'] + "\n\n"

        prompt_string += "The keypoint refers to the high-level keypoint provided by the user and your task is to write a full 'Note' point. Make sure that your expanded note point matches the writing style of 'Note' in the provided examples."

    return prompt_string

In [None]:
import time
from openai import OpenAI # Might need to install this package

client = OpenAI(
    api_key = '***'
)

def call_gpt(points, user):
    prompt_string = get_formatted_prompt_string(user-1)
    timings = ''
    expansion = []
    for point in points:
        start_time = time.time()
        expanded_point = point
        transcript = ".".join(expanded_point['transcript'])
        prompt = "Expand the provided keypoint into a one sentence note.\n" + \
                    "Transcript: ..." + transcript + "...\n" + \
                    "Keypoint: " + expanded_point['point'] + "\n" + \
                    "Note:"

        print('calling expansion from', prompt)

        res = client.chat.completions.create(
            messages=[{'role': 'system', 'content': prompt_string}, {'role': 'user', 'content': prompt}],
            model="gpt-4-1106-preview",
            seed=0,
            temperature=0.3,
            top_p=0.1,
        )

        end_time = time.time()
        time_diff = end_time - start_time
        timings += str(time_diff) + '\n'

        if res.choices[0].message.content is not None:
            expansion.append({'point': point['point'], 'expansion': res.choices[0].message.content, 'old': False})

    print(f'-----\nP{user} timing: {timings}\n-----')
    with open(f"api_timings/P{user}.txt", "w") as file:
            file.write(timings)

    return expansion

In [None]:
res = [None] * 12
for i in range(0, 12):
    res[i] = call_gpt(micro_notes[i], i+1)

In [None]:
import os

# List of file names
file_names = [f"api_timings/P{i}.txt" for i in range(1, 13)]

# Print the header of the table
print(f"{'User':<15} {'API Time(s)':<10}")

# Loop through each file
for idx, file_name in enumerate(file_names):
    try:
        # Open the file
        with open(file_name, 'r') as file:
            # Initialize variables for sum and count
            total = 0.0
            count = 0

            # Read lines and process them
            for line in file:
                try:
                    # Convert line to float and add to the total
                    total += float(line.strip())
                    count += 1
                except ValueError:
                    # Handle the case where the line isn't a valid number
                    pass

            # Calculate the average if there are any numbers
            if count > 0:
                average = total / count
            else:
                average = 0  # Handle empty files or files with no valid numbers

            # Print the file name and the average in tabular format
            print(f"P{idx+1:<15} {average:<10.4f}")

    except FileNotFoundError:
        print(f"{file_name:<15} {'File not found':<10}")