In [19]:
# Cell 1: Install required libraries
!pip install yt-dlp requests


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [20]:
# Cell 2: Set up AssemblyAI API Key
API_KEY_ASSEMBLYAI = "YOUR_API_KEY"

In [21]:
# Cell 3: Imports
import os
import json
import time
import requests
import yt_dlp

In [22]:
# Cell 4: AssemblyAI Config
UPLOAD_ENDPOINT = 'https://api.assemblyai.com/v2/upload'
TRANSCRIPT_ENDPOINT = 'https://api.assemblyai.com/v2/transcript'
HEADERS_AUTH_ONLY = {'authorization': API_KEY_ASSEMBLYAI}
HEADERS_JSON = {
    "authorization": API_KEY_ASSEMBLYAI,
    "content-type": "application/json"
}
CHUNK_SIZE = 5_242_880  # 5MB

In [23]:
# Cell 5: YouTube audio downloader
def download_audio(url, output_dir="downloads"):
    os.makedirs(output_dir, exist_ok=True)
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': os.path.join(output_dir, '%(title).200s.%(ext)s'),
        'quiet': True,
        'no_warnings': True,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            filename = ydl.prepare_filename(info)
            filename = os.path.splitext(filename)[0] + ".mp3"
            title = info.get('title', 'audio')
            return filename, title
    except Exception as e:
        print(f"Error downloading audio: {str(e)}")
        return None, None


In [24]:
# Cell 6: Upload audio file
def upload(filename):
    def read_file(filename):
        with open(filename, 'rb') as f:
            while True:
                data = f.read(CHUNK_SIZE)
                if not data:
                    break
                yield data
    response = requests.post(UPLOAD_ENDPOINT, headers=HEADERS_AUTH_ONLY, data=read_file(filename))
    return response.json()['upload_url']

In [25]:
# Cell 7: Transcribe
def transcribe(audio_url, sentiment_analysis=True):
    request_body = {
        'audio_url': audio_url,
        'sentiment_analysis': sentiment_analysis
    }
    response = requests.post(TRANSCRIPT_ENDPOINT, json=request_body, headers=HEADERS_JSON)
    return response.json()['id']

In [26]:
# Cell 8: Poll
def poll(transcript_id):
    polling_endpoint = f"{TRANSCRIPT_ENDPOINT}/{transcript_id}"
    while True:
        response = requests.get(polling_endpoint, headers=HEADERS_JSON)
        data = response.json()
        if data['status'] == 'completed':
            return data, None
        elif data['status'] == 'error':
            return None, data['error']
        print("Waiting 30 seconds...")
        time.sleep(30)

In [27]:
# Cell 9: Save transcript and sentiment
def save_transcript(data, title):
    os.makedirs("data", exist_ok=True)
    base = os.path.join("data", title.replace(" ", "_"))
    
    try:
        with open(base + ".txt", "w") as f:
            f.write(data["text"])
        if "sentiment_analysis_results" in data:
            with open(base + "_sentiments.json", "w") as f:
                json.dump(data["sentiment_analysis_results"], f, indent=4)
        print("Transcription saved.")
    except Exception as e:
        print("Failed to save transcript:", e)

In [28]:
# Cell 10: Analyze sentiments
def analyze_sentiments(title):
    path = os.path.join("data", title.replace(" ", "_") + "_sentiments.json")
    try:
        with open(path, "r") as f:
            data = json.load(f)
        pos = [x["text"] for x in data if x["sentiment"] == "POSITIVE"]
        neg = [x["text"] for x in data if x["sentiment"] == "NEGATIVE"]
        neu = [x["text"] for x in data if x["sentiment"] == "NEUTRAL"]
        
        print("\nSentiment Summary")
        print("------------------")
        print("Positive:", len(pos))
        print("Negative:", len(neg))
        print("Neutral :", len(neu))

        if pos: print("\nPositive example:", pos[0])
        if neg: print("\nNegative example:", neg[0])
    except Exception as e:
        print("Error reading sentiments:", e)

In [30]:
# Cell 11: Full workflow
def process_youtube_video(url):
    audio_file, title = download_audio(url)
    if not audio_file:
        print("Failed to download.")
        return
    
    print("Uploading to AssemblyAI...")
    upload_url = upload(audio_file)

    print("Transcribing...")
    transcript_id = transcribe(upload_url, sentiment_analysis=True)
    data, error = poll(transcript_id)

    if error:
        print("Transcription Error:", error)
        return

    save_transcript(data, title)
    analyze_sentiments(title)

In [31]:
# Cell 12: Run
video_url = input("Enter a YouTube video URL: ")
process_youtube_video(video_url)


Uploading to AssemblyAI...                                              
Transcribing...
Waiting 30 seconds...
Transcription saved.

Sentiment Summary
------------------
Positive: 10
Negative: 2
Neutral : 42

Positive example: I am happy to announce new series on machine learning with Python.

Negative example: Similarly, if I asked you to search through millions of records, then that search operation is extremely difficult for humans, whereas machines can do it in a matter of nanosecond.
