In [None]:
pip install SpeechRecognition pydub

In [None]:
pip install openai-whisper


In [None]:
import whisper

model = whisper.load_model("small")  # or "base", "medium"
result = model.transcribe("Recording.m4a")
print(result["text"])

In [1]:
import os
import re
import torch
import whisper
from transformers import pipeline

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [2]:
# Force GPU if available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load summariser and sentiment analyser
summariser = pipeline(
    "summarization",
    model="facebook/bart-large-cnn",
    device=0 if DEVICE == "cuda" else -1
)
sentiment_analyser = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    device=0 if DEVICE == "cuda" else -1
)

In [3]:
def safe_filename(text: str, max_len: int = 50) -> str:
    """
    Convert summary text into a safe filename.
    Keep alphanumeric and underscores, truncate length.
    """
    raw = "_".join(text.strip().split())
    safe = re.sub(r'[^A-Za-z0-9_]+', '', raw)
    return (safe[:max_len] or "transcription").lower()

def truncate_text_for_model(text: str, max_chars: int) -> str:
    """
    Truncate text to roughly max_chars characters for safe model input.
    """
    return text[:max_chars]

def main(in_path):
    audio_path = in_path

    # Load Whisper model on GPU
    model = whisper.load_model("small", device=DEVICE)

    # Transcribe
    print(f"Transcribing {audio_path} on {DEVICE}...")
    result = model.transcribe(audio_path)
    transcript_text = result["text"]

    summary_text_input = truncate_text_for_model(transcript_text, max_chars=4000)


    # Concise summary (shorter max_length)
    summary = summariser(
        summary_text_input,
        max_length=15,
        min_length=5,
        do_sample=False
    )[0]["summary_text"]
    print(f"Summary: {summary}")

    # Sentiment analysis
    sentiment_result = sentiment_analyser(transcript_text[512:2*512])[0]  # limit to first 512 tokens
    sentiment = f"{sentiment_result['label']} ({sentiment_result['score']:.2f})"
    print(f"Sentiment: {sentiment}")

    # Build output filename from summary
    base_dir = os.path.dirname(audio_path) or "."
    fname = safe_filename(summary)
    output_path = os.path.join(base_dir, f"{fname}.txt")

    # Save summary, sentiment, and transcription
    with open(output_path, "w", encoding="utf-8") as f:
        f.write("=== SUMMARY ===\n")
        f.write(summary + "\n\n")
        f.write("=== SENTIMENT ===\n")
        f.write(sentiment + "\n\n")
        f.write("=== TRANSCRIPT ===\n")
        f.write(transcript_text)

    print(f"Transcription, summary, and sentiment saved to {output_path}")

In [5]:
if __name__ == "__main__":
    for i in range(19,43):
        main(f"Recording ({i}).m4a")

Transcribing Recording (19).m4a on cuda...
Summary: April 2024. What did I do to level up today?
Sentiment: POSITIVE (1.00)
Transcription, summary, and sentiment saved to .\april_2024_what_did_i_do_to_level_up_today.txt
Transcribing Recording (20).m4a on cuda...
Summary: "I truly don't understand what it is about me that
Sentiment: NEGATIVE (1.00)
Transcription, summary, and sentiment saved to .\i_truly_dont_understand_what_it_is_about_me_that.txt
Transcribing Recording (21).m4a on cuda...
Summary: "I'm feeling resentment, I think, and this is
Sentiment: POSITIVE (1.00)
Transcription, summary, and sentiment saved to .\im_feeling_resentment_i_think_and_this_is.txt
Transcribing Recording (22).m4a on cuda...
Summary: 10th of May 2024. Time to talk with and to
Sentiment: POSITIVE (1.00)
Transcription, summary, and sentiment saved to .\10th_of_may_2024_time_to_talk_with_and_to.txt
Transcribing Recording (23).m4a on cuda...
Summary: We are overextending and overstimulating ourselves. We
Sent

In [None]:
conda install -c conda-forge ffmpeg