In [1]:
import os
import librosa
import numpy as np
import pandas as pd

root_folder = r"C:\Users\Adeel\Desktop\Fyp\Fyp Dataset"
output_csv = os.path.join(root_folder, "ravdess_all_actors_full_features.csv")

emotion_map = {
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
}

max_pad_len = 130  # Time frames
n_mfcc = 13

def extract_info(filename):
    parts = filename.split("-")
    if len(parts) >= 7:
        emotion_code = parts[2]
        actor_id = int(parts[-1].split(".")[0])
        emotion = emotion_map.get(emotion_code, "unknown")
        empathy_label = 1 if emotion in ['calm', 'happy'] else 0
        return emotion, empathy_label, actor_id
    return "unknown", 0, -1

def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=16000)

        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        rmse = librosa.feature.rms(y=y)

        features = np.vstack([mfcc, chroma, spec_centroid, zcr, rmse])  # Shape: (28, time)

        # Pad or truncate
        if features.shape[1] < max_pad_len:
            pad_width = max_pad_len - features.shape[1]
            features = np.pad(features, ((0, 0), (0, pad_width)), mode='constant')
        else:
            features = features[:, :max_pad_len]

        return features.T  # Shape: (130, 28)

    except Exception as e:
        print(f"❌ Error extracting from {file_path}: {e}")
        return np.zeros((max_pad_len, 28))

# Main loop
data = []

for dirpath, _, files in os.walk(root_folder):
    for file in files:
        if file.endswith(".wav"):
            file_path = os.path.join(dirpath, file)
            try:
                features = extract_features(file_path)  # shape (130, 28)
                emotion, empathy, actor_id = extract_info(file)

                flat_features = features.flatten()  # 130 × 28 = 3640

                row = {
                    "filename": file,
                    "actor_id": actor_id,
                    "emotion": emotion,
                    "empathy_label": empathy
                }

                for i, val in enumerate(flat_features):
                    row[f"feat_{i+1}"] = val

                data.append(row)
                print(f"✅ Processed {file} - Emotion: {emotion}")

            except Exception as e:
                print(f"❌ Error processing {file}: {e}")

# Save
if data:
    df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False)
    print(f"\n💾 Saved full feature dataset to: {output_csv}")
else:
    print("⚠️ No data extracted.")


✅ Processed yt_audio.wav - Emotion: unknown
✅ Processed 03-01-01-01-01-01-01.wav - Emotion: neutral
✅ Processed 03-01-01-01-01-02-01.wav - Emotion: neutral
✅ Processed 03-01-01-01-02-01-01.wav - Emotion: neutral
✅ Processed 03-01-01-01-02-02-01.wav - Emotion: neutral
✅ Processed 03-01-02-01-01-01-01.wav - Emotion: calm
✅ Processed 03-01-02-01-01-02-01.wav - Emotion: calm
✅ Processed 03-01-02-01-02-01-01.wav - Emotion: calm
✅ Processed 03-01-02-01-02-02-01.wav - Emotion: calm
✅ Processed 03-01-02-02-01-01-01.wav - Emotion: calm
✅ Processed 03-01-02-02-01-02-01.wav - Emotion: calm
✅ Processed 03-01-02-02-02-01-01.wav - Emotion: calm
✅ Processed 03-01-02-02-02-02-01.wav - Emotion: calm
✅ Processed 03-01-03-01-01-01-01.wav - Emotion: happy
✅ Processed 03-01-03-01-01-02-01.wav - Emotion: happy
✅ Processed 03-01-03-01-02-01-01.wav - Emotion: happy
✅ Processed 03-01-03-01-02-02-01.wav - Emotion: happy
✅ Processed 03-01-03-02-01-01-01.wav - Emotion: happy
✅ Processed 03-01-03-02-01-02-01.wav -

In [3]:
df = pd.read_csv(r"C:\Users\Adeel\Desktop\Fyp\Fyp Dataset\ravdess_all_actors_full_features.csv")
print(df.head())


                   filename  actor_id  emotion  empathy_label      feat_1  \
0              yt_audio.wav        -1  unknown              0 -551.247986   
1  03-01-01-01-01-01-01.wav         1  neutral              0 -875.163940   
2  03-01-01-01-01-02-01.wav         1  neutral              0 -873.403503   
3  03-01-01-01-02-01-01.wav         1  neutral              0 -847.362854   
4  03-01-01-01-02-02-01.wav         1  neutral              0 -840.580750   

     feat_2    feat_3    feat_4    feat_5    feat_6  ...  feat_3631  \
0  0.000000  0.000000  0.000000  0.000000  0.000000  ...   0.514816   
1  0.000000  0.000000  0.000000  0.000000  0.000000  ...   0.000000   
2  0.000000  0.000000  0.000000  0.000000  0.000000  ...   0.000000   
3  8.626196  8.449971  8.163250  7.776121  7.302010  ...   0.000000   
4  6.144870  0.699101  3.466658  7.742476  4.110916  ...   0.000000   

   feat_3632  feat_3633  feat_3634  feat_3635  feat_3636  feat_3637  \
0   0.476761   0.397562   0.460118   0.

In [9]:
import os
import librosa
import numpy as np
import yt_dlp
import json
import speech_recognition as sr
import joblib # or just `import joblib` if using normal pip install
from moviepy.editor import VideoFileClip

# ---------------------------- Step 1: Download YouTube Audio ----------------------------
def download_youtube_audio(url, output_wav="yt_audio1.wav"):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': 'yt_audio.%(ext)s',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
            'preferredquality': '192',
        }],
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
        print(f"✅ Downloaded and converted to {output_wav}")

# ---------------------------- Step 2: Transcribe Audio ----------------------------
def transcribe_audio(wav_file):
    r = sr.Recognizer()
    with sr.AudioFile(wav_file) as source:
        audio = r.record(source)
        text = r.recognize_google(audio)
    return text

# ---------------------------- Step 3: Extract MFCC & Predict Emotion ----------------------------
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=16000)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0).reshape(1, -1)

def predict_emotion(model, encoder, features):
    prediction = model.predict(features)
    return encoder.inverse_transform(prediction)[0]

# ---------------------------- Step 4: Compute Empathy Score ----------------------------
def compute_empathy_score(audio_path):
    y, sr = librosa.load(audio_path)
    pitch = librosa.yin(y, fmin=75, fmax=300)
    pitch_var = np.var(pitch)
    energy = np.mean(librosa.feature.rms(y=y))
    empathy_score = round((pitch_var * 0.6 + energy * 0.4) * 100, 2)
    return empathy_score

# ---------------------------- Step 5: Main Pipeline ----------------------------
def run_pipeline(youtube_url):
    download_youtube_audio(youtube_url, "yt_audio.wav")

    transcription = transcribe_audio("yt_audio.wav")
    print("🗣️ Transcription:", transcription)

    features = extract_features("yt_audio.wav")

    # Load your trained model and encoder
    model = joblib.load("emotion_model.pkl")
    label_encoder = joblib.load("emotion_label_encoder.pkl")

    emotion = predict_emotion(model, label_encoder, features)
    print("🎭 Predicted Emotion:", emotion)

    empathy = compute_empathy_score("yt_audio.wav")
    print("💓 Empathy Score:", empathy)

    # Save to JSON
    result = {
        "youtube_url": youtube_url,
        "transcription": transcription,
        "emotion": emotion,
        "empathy_score": empathy
    }

    with open("session_scores.json", "w") as f:
        json.dump(result, f, indent=4)
    print("📄 Result saved to session_scores.json")

# ---------------------------- Run Example ----------------------------
# Replace with your actual YouTube video URL
run_pipeline("https://youtube.com/shorts/bUSh5oa-iuI?si=2BuJ8U3jBJ8XpRm2")


[youtube] Extracting URL: https://youtube.com/shorts/bUSh5oa-iuI?si=2BuJ8U3jBJ8XpRm2
[youtube] bUSh5oa-iuI: Downloading webpage
[youtube] bUSh5oa-iuI: Downloading tv client config
[youtube] bUSh5oa-iuI: Downloading tv player API JSON
[youtube] bUSh5oa-iuI: Downloading ios player API JSON
[youtube] bUSh5oa-iuI: Downloading m3u8 information
[info] bUSh5oa-iuI: Downloading 1 format(s): 251
[download] Destination: yt_audio.webm
[download] 100% of  539.83KiB in 00:00:02 at 193.72KiB/s 
[ExtractAudio] Destination: yt_audio.wav
Deleting original file yt_audio.webm (pass -k to keep)
✅ Downloaded and converted to yt_audio.wav
🗣️ Transcription: yes great your full name for me b a n s a l OK Google and struggling with the Hay Day for two days let's see thank you so much
🎭 Predicted Emotion: disgust
💓 Empathy Score: 227965.28
📄 Result saved to session_scores.json
