In [None]:
import os
from pydub import AudioSegment
import markdown

In [None]:
def split_mp3(input_file, output_folder, segment_length=10):
    # Load the MP3 file
    audio = AudioSegment.from_mp3(input_file)

    # Get the total duration of the audio in milliseconds
    total_duration = len(audio)

    # Calculate the segment length in milliseconds
    segment_length_ms = segment_length * 60 * 1000

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Split the audio into segments
    for i, start in enumerate(range(0, total_duration, segment_length_ms)):
        end = start + segment_length_ms
        segment = audio[start:end]

        # Generate the output file name
        output_file = os.path.join(output_folder, f"segment_{i+1}.mp3")

        # Export the segment as a new MP3 file
        segment.export(output_file, format="mp3")

        print(f"Segment {i+1} saved: {output_file}")

In [None]:
input_file = "/workspace/data/lecture-06.mp3"
output_folder = "/workspace/data/segments"

split_mp3(input_file, output_folder)

In [None]:
from openai import OpenAI
import os

In [None]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [None]:
def transcribe_audio(audio_file_path, output_file_path):
    try:
        # Open the audio file
        with open(audio_file_path, "rb") as audio_file:
            # Call the Whisper API
            transcription = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
            )

        # Extract the transcribed text
        transcribed_text = transcription.text

        # Save the transcript to a file
        with open(output_file_path, "w", encoding="utf-8") as output_file:
            output_file.write(transcribed_text)

        print(f"Transcription saved to {output_file_path}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [None]:
transcribe_path = "/workspace/data/transcribes"
segments_path = "/workspace/data/segments"

os.makedirs(transcribe_path, exist_ok=True)

for file in os.listdir(segments_path):
    if file.endswith(".mp3"):
        audio_file_path = os.path.join(segments_path, file)
        output_file_path = os.path.join(transcribe_path, file.replace(".mp3", ".txt"))

    transcribe_audio(audio_file_path, output_file_path)

In [None]:
# List to store the content of all files
all_content = []

# Loop through all files in the directory
# get the number of segments
segments_count = len([name for name in os.listdir(transcribe_path) if name.endswith(".txt")])
for i in range(1, segments_count + 1):  # Assuming files are numbered 1 through 8
    filename = f"segment_{i}.txt"
    file_path = os.path.join(transcribe_path, filename)
    
    # Check if file exists
    if os.path.exists(file_path):
        with open(file_path, 'r') as file:
            content = file.read()
            all_content.append(content)
    else:
        print(f"File {filename} not found.")

# Join all content into a single string
combined_content = "\n".join(all_content)


In [None]:
len(combined_content)

In [None]:

# Function to generate lecture notes using GPT-4
def generate_lecture_notes(content):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that generates very well structured lecture notes from transcripts."},
                {"role": "user", "content": f"""Please generate comprehensive lecture notes from the following transcript:

                {content}

                In creating these notes, please incorporate the following advanced teaching and learning techniques:

                1. Organize the content using a clear hierarchical structure (main topics, subtopics, key points).
                2. Include a brief summary or learning objectives at the beginning.
                3. Use bullet points and numbered lists for easy readability and retention.
                4. Incorporate visual elements where appropriate (e.g., diagrams, charts, or mind maps).
                5. Highlight key terms, definitions, and important concepts.
                6. Add relevant examples and real-world applications to illustrate complex ideas.
                7. Include thought-provoking questions or discussion points to encourage critical thinking.
                8. Provide analogies or metaphors to explain difficult concepts.
                9. Insert brief "check your understanding" sections with sample questions or problems.
                10. Include mnemonics or memory aids where applicable.
                11. Add cross-references to related topics or previous lectures if relevant.
                12. Conclude with a summary of main takeaways and potential areas for further exploration.

                Feel free to expand on the given content by adding relevant background information, filling in any gaps to ensure completeness, and enriching the material with additional examples or explanations. The goal is to create comprehensive, engaging, and effective lecture notes that facilitate deep understanding and retention of the subject matter. Add at least 5 references for relative materials for readings"""}
            ],
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Generate lecture notes
lecture_notes = generate_lecture_notes(combined_content)

if lecture_notes:
    print("Lecture Notes:")
    print(lecture_notes)

    # also generate html
    html = markdown.markdown(lecture_notes)
    
    # ave the lecture notes to a file
    with open("/workspace/data/lecture_notes.txt", "w") as file:
        file.write(lecture_notes)
    with open("/workspace/data/lecture_notes.html", "w") as file:
        file.write(html)
    print("Lecture notes saved to /workspace/data/lecture_notes.txt")
else:
    print("Failed to generate lecture notes.")

In [None]:

# Function to generate lecture notes using GPT-4
def generate_lecture_notes_simple(content):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that generates very well structured lecture notes from transcripts."},
                {"role": "user", "content": f"Please generate detailed lecture notes from the following transcript:{content}"}
            ],
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Generate lecture notes
lecture_notes_simple = generate_lecture_notes_simple(combined_content)

if lecture_notes_simple:
    # also generate html
    html_simple = markdown.markdown(lecture_notes_simple)
    
    # ave the lecture notes to a file
    with open("/workspace/data/lecture_notes_simple.txt", "w") as file:
        file.write(lecture_notes_simple)
    with open("/workspace/data/lecture_notes_simple.html", "w") as file:
        file.write(html_simple)
    print("Lecture notes saved to /workspace/data/lecture_notes.txt")
else:
    print("Failed to generate lecture notes.")