In [5]:
!pip install openai-whisper

Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/803.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m798.7/803.2 kB[0m [31m28.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
  Created wheel for openai-whisper: filename=openai_whisper-20250625-py3-none-any.whl size=803980 sha256=ab20c83686bbd7bf98803be37a7d651baa7728f4acbcf1d57a2faa1d5849f68b
  Stored in directory: /root/.cache/pip/wheels/61/d2/20/09ec9bef734d126cba375b

In [2]:
import whisper
import datetime

# 1. Load the model
# 'large' is the most accurate version for industry terms and brands
print("Loading Whisper model... please wait.")
model = whisper.load_model("large")

# 2. Transcribe the file
# Ensure your file is named 'podcast_trends.mp3' and uploaded to Colab
file_path = "podcast_trends.mp3"
print(f"Transcribing {file_path}...")
result = model.transcribe(file_path)

# 3. Define keywords to find the specific segments
# We are looking for the Gourmand section and the Russia section
search_terms = ["vanilla", "gourmand", "russia", "duhi", "saint stacking"]

print("\n--- SCANNING FOR CHAPTER TIMESTAMPS ---")

found_any = False
for segment in result['segments']:
    text_content = segment['text'].lower()

    for term in search_terms:
        if term in text_content:
            found_any = True
            start_seconds = int(segment['start'])
            # Convert seconds to MM:SS format for easy checking
            timestamp_readable = str(datetime.timedelta(seconds=start_seconds))

            print(f"MATCH FOUND: '{term}'")
            print(f"Time (seconds): {start_seconds}")
            print(f"Time (MM:SS):   {timestamp_readable}")
            print(f"Snippet:        \"{segment['text'].strip()}\"")
            print("-" * 40)

if not found_any:
    print("No keywords found. Please check the transcript manually.")

print("\nPROCESS COMPLETE.")

Loading Whisper model... please wait.


100%|█████████████████████████████████████| 2.88G/2.88G [00:41<00:00, 73.6MiB/s]


Transcribing podcast_trends.mp3...


KeyboardInterrupt: 

In [6]:
import os
print("Files currently in Colab:")
print(os.listdir())

Files currently in Colab:
['.config', 'podcast_trends.mp3', 'drive', 'sample_data']


In [7]:
import whisper
import torch
import os
from google.colab import files

# 1. Setup device and load model
# 'medium' or 'large' is better for brand names like "Givaudan" or "Duhi"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
model = whisper.load_model("medium", device=device)

# 2. UPDATED KEYWORDS for your fragrance project
keywords = ["vanilla", "gourmand", "russia", "duhi", "recession", "givaudan"]
file_name = "podcast_trends.mp3"
output_file = "timestamps_for_app.txt"

if os.path.exists(file_name):
    print(f"Analyzing {file_name}... Please wait, extracting strategic timestamps.")

    # Transcription
    result = model.transcribe(file_name, verbose=False)
    segments = result['segments']

    found_matches = []

    # 3. Search for keywords in segments
    for segment in segments:
        text = segment['text'].lower()
        for word in keywords:
            if word.lower() in text:
                start_sec = int(segment['start'])
                # Format MM:SS for you to read
                timestamp_min = f"{start_sec // 60:02d}:{start_sec % 60:02d}"

                # We save both: MM:SS for you and SECONDS for app.py
                match_line = f"[{timestamp_min}] (Seconds: {start_sec}) {word.upper()}: {segment['text'].strip()}"
                found_matches.append(match_line)

    # 4. Save and Auto-Download
    with open(output_file, "w", encoding="utf-8") as f:
        f.write("FRAGRANCE HUB TIMESTAMPS FOR APP.PY\n")
        f.write("="*40 + "\n")
        f.write("Copy the 'Seconds' value directly to your PODCAST_SCRIPT in VS Code.\n\n")
        if found_matches:
            # We use a set to avoid showing the same segment multiple times if it has 2 keywords
            for match in sorted(list(set(found_matches))):
                f.write(match + "\n")
                print(match) # Also print to console so you don't have to open the file
        else:
            f.write("No fragrance keywords found. Check if the audio is correct.")

    print(f"\n✅ Done! Downloading {output_file}...")
    files.download(output_file)

else:
    print(f"❌ Error: '{file_name}' not found! Make sure you uploaded it to the folder icon on the left.")

Using device: cuda
Analyzing podcast_trends.mp3... Please wait, extracting strategic timestamps.
Detected language: English


100%|██████████| 179573/179573 [05:09<00:00, 579.97frames/s]

[00:50] (Seconds: 50) RECESSION: We're calling it recession glam.
[00:51] (Seconds: 51) RECESSION: Recession glam.
[00:59] (Seconds: 59) RECESSION: From the early 2000s recession, I think.
[02:08] (Seconds: 128) GIVAUDAN: Right, to the Swiss giant Givaudan.
[02:09] (Seconds: 129) GIVAUDAN: Which is Givaudan.
[02:40] (Seconds: 160) RECESSION: So let's start with this concept of recession glam
[06:15] (Seconds: 375) VANILLA: Pistachios, salted caramel, vanilla.
[06:18] (Seconds: 378) GOURMAND: These aren't just gourmand food-like scents.
[06:46] (Seconds: 406) RECESSION: So in a recession environment, or let's even
[07:30] (Seconds: 450) RECESSION: And that is where the glam part of recession glam comes in.
[08:38] (Seconds: 518) VANILLA: The notes are caramelized vanilla, toasted macadamia nut.
[08:42] (Seconds: 522) GOURMAND: It fits perfectly into this huge gourmand trend
[09:31] (Seconds: 571) GOURMAND: The first one, as we said, is Gourmand 2.0.
[09:39] (Seconds: 579) GOURMAND: Gour




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import whisper
import os
from google.colab import files

# 1. Setup and Model Loading
if 'model' not in locals():
    print("Loading Whisper model... please wait.")
    model = whisper.load_model("medium")

# 2. Check if the audio file exists
audio_file = "podcast_trends.mp3"

if os.path.exists(audio_file):
    print(f"✅ Found {audio_file}. Starting full transcription...")

    # 3. Execute transcription
    # Note: No extra spaces at the start of these lines!
    result = model.transcribe(audio_file)
    full_text = result['text']

    # 4. Save the full text to a .txt file
    transcript_name = "full_podcast_transcript.txt"
    with open(transcript_name, "w", encoding="utf-8") as f:
        f.write(full_text)

    # 5. Download the file
    print(f"✨ Transcription complete! Downloading {transcript_name}...")
    files.download(transcript_name)

else:
    print(f"❌ Error: '{audio_file}' not found in Colab files.")

    

✅ Found podcast_trends.mp3. Starting full transcription...
✨ Transcription complete! Downloading full_podcast_transcript.txt...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
import whisper

print("Step 1: Loading Whisper AI model (this may take a few seconds)...")
# Load the "base" model
model = whisper.load_model("base") 

print("Step 2: Starting transcription for podcast_2026.mp3...")
result_2026 = model.transcribe("podcast_2026.mp3")

print("Step 3: Saving the result to a Markdown file...")
with open("podcast_transcript_2026.md", "w", encoding="utf-8") as f:
    f.write(result_2026["text"])

print("✨ Success! The podcast_transcript_2026.md file is ready.")

Step 1: Loading Whisper AI model (this may take a few seconds)...
Step 2: Starting transcription for podcast_2026.mp3...




Step 3: Saving the result to a Markdown file...
✨ Success! The podcast_transcript_2026.md file is ready.
