In [7]:
# Cell 1: Download audio from YouTube URL and save as MP3
# Install yt-dlp if not already: !pip install yt-dlp

import subprocess
from pathlib import Path

# ---------- CONFIG ----------
YOUTUBE_URL = "https://www.youtube.com/watch?v=yN7ypxC7838"  # Replace with the video URL
OUTPUT_AUDIO_FILE = "lecture_audio.mp3"  # Path to save the audio

# Ensure output path exists
Path(OUTPUT_AUDIO_FILE).parent.mkdir(parents=True, exist_ok=True)

# Download audio using yt-dlp
print("🎧 Downloading audio from YouTube...")
cmd = [
    "yt-dlp",
    "-f", "bestaudio",
    "--extract-audio",
    "--audio-format", "mp3",
    "-o", OUTPUT_AUDIO_FILE,
    YOUTUBE_URL
]

subprocess.run(cmd, check=True)
print(f"✅ Audio downloaded and saved to {OUTPUT_AUDIO_FILE}")


🎧 Downloading audio from YouTube...
✅ Audio downloaded and saved to lecture_audio.mp3


In [9]:
# Cell 1: Transcribe audio to text and save to file
# Install Whisper if not already: !pip install openai-whisper

import whisper

# ---------- CONFIG ----------
AUDIO_FILE = "lecture_audio.mp3"  # Replace with your audio file path
OUTPUT_TEXT_FILE = "transcript.txt"

# Load Whisper model
model = whisper.load_model("base")  # You can use "small", "medium", "large" for better accuracy

# Transcribe audio
print("Transcribing audio...")
result = model.transcribe(AUDIO_FILE)
transcript = result["text"]
print("Transcription completed!")

# Save transcript to txt file
with open(OUTPUT_TEXT_FILE, "w", encoding="utf-8") as f:
    f.write(transcript)

print(f"Transcript saved to {OUTPUT_TEXT_FILE}")


Transcribing audio...
Transcription completed!
Transcript saved to transcript.txt


In [10]:
# Cell 2: Summarize transcript into lecture notes using Ollama Llama 3.1 locally
import subprocess

# ---------- CONFIG ----------
OLLAMA_MODEL = "llama3"  # Your Ollama Llama 3.1 model
TRANSCRIPT_FILE = "transcript.txt"  # Transcript generated in previous cell
OUTPUT_NOTES_FILE = "lecture_notes.txt"

# Read transcript
with open(TRANSCRIPT_FILE, "r", encoding="utf-8") as f:
    transcript = f.read()

# Prepare prompt for Ollama
prompt = f"""
You are an expert lecture note-taker.
Take the following transcript and convert it into structured lecture notes with clear sections, headings, and bullet points.

Transcript:
{transcript}
"""

# Call Ollama CLI using 'run' instead of 'generate'
print("Generating lecture notes with Ollama Llama 3.1...")

process = subprocess.Popen(
    ["ollama", "run", OLLAMA_MODEL],
    stdin=subprocess.PIPE,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True
)

lecture_notes, errors = process.communicate(prompt)

if errors:
    print("Errors:", errors)

# Save lecture notes to file
with open(OUTPUT_NOTES_FILE, "w", encoding="utf-8") as f:
    f.write(lecture_notes)

print(f"Lecture notes saved to {OUTPUT_NOTES_FILE}")
print("\n===== Preview =====\n")
print(lecture_notes[:1000], "...")  # Preview first 1000 characters


Generating lecture notes with Ollama Llama 3.1...


Exception in thread Thread-19 (_readerthread):
Traceback (most recent call last):
  File "C:\Users\Dhruv\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\Users\Dhruv\OneDrive\Desktop\Astudy\DL\DL-mpR\.venv\Lib\site-packages\ipykernel\ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "C:\Users\Dhruv\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\Dhruv\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1599, in _readerthread
    buffer.append(fh.read())
                  ^^^^^^^^^
  File "C:\Users\Dhruv\AppData\Local\Programs\Python\Python312\Lib\encodings\cp1252.py", line 23, in decode
    return codecs.charmap_decode(input,self.errors,decoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in positio

Lecture notes saved to lecture_notes.txt

===== Preview =====

Here is the transcript converted into structured lecture notes with clear sections, headings, and bullet points:

**Machine Learning Models**

### Overview

* Machine learning models can be broadly categorized as supervised or unsupervised
* Today's lecture will cover both types of models and their subcategories

### Supervised Learning

#### Definition

* Involves a series of functions that maps an input to an output based on a series of example input-output pairs
* Example: Predicting shoe size based on age using a dataset of two variables (age, shoe size)

#### Subcategories

* **Regression**
	+ Finds a target value based on independent predictors
	+ Output is continuous
	+ Examples:
		- Linear Regression: finding a line that fits the data
		- Multiple Linear Regression: finding a plane of best fit
		- Polynomial Regression: finding a curve for best fit
* **Classification**
	+ Output is discrete
	+ Examples:
		- Logistic