In [8]:
import os
from pydub import AudioSegment

In [9]:
def split_mp3(input_file, output_folder, segment_length=10):
    # Load the MP3 file
    audio = AudioSegment.from_mp3(input_file)

    # Get the total duration of the audio in milliseconds
    total_duration = len(audio)

    # Calculate the segment length in milliseconds
    segment_length_ms = segment_length * 60 * 1000

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Split the audio into segments
    for i, start in enumerate(range(0, total_duration, segment_length_ms)):
        end = start + segment_length_ms
        segment = audio[start:end]

        # Generate the output file name
        output_file = os.path.join(output_folder, f"segment_{i+1}.mp3")

        # Export the segment as a new MP3 file
        segment.export(output_file, format="mp3")

        print(f"Segment {i+1} saved: {output_file}")

In [None]:
input_file = "/workspace/data/lecture-04.mp3"
output_folder = "/workspace/data/segments"

split_mp3(input_file, output_folder)

In [11]:
from openai import OpenAI
import os

In [12]:
client = OpenAI()

In [6]:
def transcribe_audio(audio_file_path, output_file_path):
    try:
        # Open the audio file
        with open(audio_file_path, "rb") as audio_file:
            # Call the Whisper API
            transcription = client.audio.transcriptions.create(
                model="openai.whisper",
                file=audio_file
            )

        # Extract the transcribed text
        # print all properties of trascription object
        print(dir(transcription))
        transcribed_text = transcription.text

        # Save the transcript to a file
        with open(output_file_path, "w", encoding="utf-8") as output_file:
            output_file.write(transcribed_text)

        print(f"Transcription saved to {output_file_path}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [None]:
transcribe_path = "/workspace/data/transcribes"
segments_path = "/workspace/data/segments"

os.makedirs(transcribe_path, exist_ok=True)

for file in os.listdir(segments_path):
    if file.endswith(".mp3"):
        audio_file_path = os.path.join(segments_path, file)
        output_file_path = os.path.join(transcribe_path, file.replace(".mp3", ".txt"))

    transcribe_audio(audio_file_path, output_file_path)

In [None]:
# List to store the content of all files
all_content = []

# Loop through all files in the directory
for i in range(1, 8):  # Assuming files are numbered 1 through 8
    filename = f"segment_{i}.txt"
    file_path = os.path.join(transcribe_path, filename)
    
    # Check if file exists
    if os.path.exists(file_path):
        with open(file_path, 'r') as file:
            content = file.read()
            all_content.append(content)
    else:
        print(f"File {filename} not found.")

# Join all content into a single string
combined_content = "\n".join(all_content)


In [None]:
len(combined_content)

In [None]:
# Function to generate lecture notes using GPT-4
def generate_lecture_notes(content):
    try:
        response = client.chat.completions.create(
            model="openai.gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that generates very well structured lecture notes from transcripts."},
                {"role": "user", "content": f"Please generate complete lecture notes from the following transcript:\n\n{content}"}
            ],
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Generate lecture notes
lecture_notes = generate_lecture_notes(combined_content)

if lecture_notes:
    print("Lecture Notes:")
    print(lecture_notes)
    
    # Optionally, save the lecture notes to a file
    with open("/workspace/data/lecture_notes.txt", "w") as file:
        file.write(lecture_notes)
    print("Lecture notes saved to /workspace/data/lecture_notes.txt")
else:
    print("Failed to generate lecture notes.")

In [None]:
# display the content of the lecture notes in nice markdown viewer
from markdown2 import markdown
from IPython.display import display, HTML

html_content = markdown(lecture_notes)
display(HTML(html_content))

In [None]:
# Function to generate lecture notes using GPT-4
def generate_lecture_notes(content):
    try:
        response = client.chat.completions.create(
            model="openai.gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that generates very well structured lecture notes from transcripts."},
                {"role": "user", "content": f"Please generate detailed lecture notes from the following transcript:\n\n{content}. Make sure to add refrences with hyperlinks"}
            ],
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Generate lecture notes
lecture_notes = generate_lecture_notes(combined_content)

if lecture_notes:
    print("Lecture Notes:")
    print(lecture_notes)
    
    # Optionally, save the lecture notes to a file
    with open("/workspace/data/lecture_notes_full.txt", "w") as file:
        file.write(lecture_notes)
    print("Lecture notes saved to /workspace/data/lecture_notes_full.txt")
else:
    print("Failed to generate lecture notes.")