In [2]:
# Install necessary libraries
!pip install moviepy pydub openai-whisper torch librosa

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidi

In [3]:
import os
import tempfile
import moviepy.editor as mp
from pydub import AudioSegment
import whisper
import pickle
import torch
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
import joblib

In [16]:
import joblib

vectorizer = joblib.load("count_vectorizer.pkl")
sentiment_model = joblib.load("random_forest_model.pkl")

In [6]:
# Load Whisper model for transcription
whisper_model = whisper.load_model("base")

100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 129MiB/s]


In [17]:
# Utility: Detect file type and convert if needed
def convert_to_mp3(file_path):
    filename, ext = os.path.splitext(file_path)
    if ext.lower() in [".mp4", ".mov", ".avi", ".mkv"]:
        # Convert video to audio (mp3)
        video = mp.VideoFileClip(file_path)
        audio_path = filename + ".mp3"
        video.audio.write_audiofile(audio_path)
        return audio_path
    elif ext.lower() in [".wav", ".mp3", ".m4a"]:
        return file_path
    else:
        raise ValueError("Unsupported file type")

In [18]:
# Transcribe and segment speakers (basic)
def transcribe_audio(file_path):
    result = whisper_model.transcribe(file_path, word_timestamps=True)
    return result['segments']

In [19]:
# Dummy speaker diarization using word timestamps (can be improved)
def segment_by_speaker(segments):
    speakers = {"SPEAKER_1": [], "SPEAKER_2": []}
    for i, segment in enumerate(segments):
        speaker = "SPEAKER_1" if i % 2 == 0 else "SPEAKER_2"  # fake alternation
        speakers[speaker].append(segment['text'])
    return speakers

In [20]:
# Sentiment analysis per speaker
sentiment_scores = {
    "positive": 1,
    "neutral": 0,
    "negative": -1
}


def analyze_sentiment(speaker_texts):
    results = {}
    for speaker, texts in speaker_texts.items():
        speaker_sentiments = []
        for text in texts:
            X = vectorizer.transform([text])  # Convert to 2D numerical features
            pred = sentiment_model.predict(X)[0]  # Predict sentiment
            score = sentiment_scores.get(pred, 0)
            speaker_sentiments.append(score)
        avg_score = np.mean(speaker_sentiments)
        if avg_score > 0.2:
            overall = "Positive"
        elif avg_score < -0.2:
            overall = "Negative"
        else:
            overall = "Neutral"
        results[speaker] = {
            "average_score": avg_score,
            "overall_sentiment": overall
        }
    return results


In [21]:
# Main function
def process_podcast(file_path):
    mp3_path = convert_to_mp3(file_path)
    segments = transcribe_audio(mp3_path)
    speaker_texts = segment_by_speaker(segments)
    sentiment_result = analyze_sentiment(speaker_texts)

    total_score = np.mean([v['average_score'] for v in sentiment_result.values()])
    if total_score > 0.2:
        overall = "Positive"
    elif total_score < -0.2:
        overall = "Negative"
    else:
        overall = "Neutral"

    return {
        "per_speaker": sentiment_result,
        "overall": overall,
        "overall_score": total_score
    }

In [25]:
from google.colab import files
import shutil

# Upload audio or video file from your device
uploaded = files.upload()

# Get uploaded filename
filename = next(iter(uploaded))

# Optionally save to a temp folder
shutil.move(filename, f"./{filename}")

# Now run the podcast analysis
result = process_podcast(filename)
print(result)

Saving MSSP - Matt's Video Game Rage.mp4 to MSSP - Matt's Video Game Rage.mp4
MoviePy - Writing audio in MSSP - Matt's Video Game Rage.mp3




MoviePy - Done.
{'per_speaker': {'SPEAKER_1': {'average_score': np.float64(0.0), 'overall_sentiment': 'Neutral'}, 'SPEAKER_2': {'average_score': np.float64(0.0), 'overall_sentiment': 'Neutral'}}, 'overall': 'Neutral', 'overall_score': np.float64(0.0)}
