In [13]:
import os
import time
import importlib
import re

from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display

import config

load_dotenv()
openai_key = os.getenv('OPENAI_KEY')
client = OpenAI(api_key=openai_key)

In [11]:
# Reload and import configuration - directly import in production

config = importlib.reload(config)
globals().update({k: getattr(config, k) for k in [
    'system_prompt', 'user_prompt_1', 'user_prompt_2', 'clean']})

In [12]:
def generate(messages, model='gpt-4.1-nano'):
    start = time.time()
    completion = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.3,  # Adjust for more creative or deterministic output
        max_tokens=10000,  # Increase this if responses are getting cut off
        top_p=0.3,
        frequency_penalty=0,
        presence_penalty=0
    )

    elapsed = time.time() - start
    print(f"Completion took {elapsed:.2f} seconds")

    return completion.choices[0].message.content, completion

In [9]:
import json

with open('Lectures.json', 'r', encoding='utf-8') as f:
    lectures = json.load(f)


In [None]:
os.makedirs("outputs", exist_ok=True)
transcripts = {}

for lecture in lectures:
    id = lecture['index']
    title = lecture['title']
    content = lecture['content']

    if id > 2:
        continue

    lec_prompt_1 = user_prompt_1 + title +  "\n\n" + content

    # Step 1

    text_1, completion_1 = generate([
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": lec_prompt_1}])

    # Step 2 A
    
    filepath = os.path.join("outputs", f"{id:02d} {re.sub(r'[<>:"/\\|?*]', '', title)}.md")

    with open(filepath, "w", encoding="utf-8") as f:
        f.write(clean(text_1))

    # Step 2 B
    
    text_2, completion_2 = generate([
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": lec_prompt_1},
        {"role": "assistant", "content": text_1},
        {"role": "user", "content": user_prompt_2},])
    
    transcripts[id] = {
        "title": title,
        "content": text_2,
    }


Create a detailed, introductory understandable study note based on the following lecture content, make sure it's well organized, ADD CONTENTT AND DETAIL, and is clear and detailed. Explain key concepts clearly and in words. Do not leave anything out that's in the lecture content. Structured liek a note, not just a list of points. Each section must have a in word introduction. Don't use overly academic tone. 

Be VERY DETAILED in each of your explanations. Prefer clear, in-word explanations over brevity. Use emojis in main headings for clarity. Number main headings like 1. [Emoji] Heading. Don't use too many sub headings.

lecture content:

01 Concepts and Techniques

Data Mining: Concepts and Techniques. - Chapter 2 - Jiawei Han, Micheline Kamber, and Jian Pei University of Illinois at Urbana-Champaign Simon Fraser University ©2011 Han, Kamber, and Pei. All rights reserved. Chapter 2: Getting to Know Your Data ◼Data Objects and Attribute Types ◼Basic Statistical Descriptions of Data ◼

In [18]:
# Save Transcripts

transcripts_list = [
    {"index": lecture_id, "title": transcript["title"], "content": transcript["content"]}
    for lecture_id, transcript in sorted(transcripts.items())
]
with open("outputs/transcripts.json", "w", encoding="utf-8") as f:
    json.dump(transcripts_list, f, ensure_ascii=False, indent=2)