In [1]:
import joblib
from keras.models import load_model

# Load audio emotion model (already trained and saved)
audio_model = joblib.load("/kaggle/input/audio-emotion/other/default/1/emotion_model.pkl")

# Load facial emotion model (already trained and saved)
face_model = load_model("/kaggle/input/face-emotion/other/default/1/model_optimal.h5")


2025-05-01 04:03:08.651378: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746072189.064818      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746072189.184615      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1746072207.801551      31 gpu_device.cc:2022] Created device /job:localhost/r

In [2]:
import librosa
import numpy as np

def predict_audio_emotion(audio_path):
    x, sr = librosa.load(audio_path)
    mfcc = np.mean(librosa.feature.mfcc(y=x, sr=sr, n_mfcc=128), axis=1).reshape(1, -1)
    pred = audio_model.predict(mfcc)[0]
    return "happy" if pred == 0 else "sad"


In [3]:
from keras.preprocessing import image

label_dict = {0:'Angry',1:'Disgust',2:'Fear',3:'Happy',4:'Neutral',5:'Sad',6:'Surprise'}

def predict_face_emotion(image_path):
    img = image.load_img(image_path, target_size=(48, 48), color_mode="grayscale")
    img = np.array(img).reshape(1, 48, 48, 1) / 255.0
    result = face_model.predict(img)
    label_idx = np.argmax(result[0])
    return label_dict[label_idx]


In [4]:
def combine_emotions(audio_path, face_image_path):
    audio_emotion = predict_audio_emotion(audio_path)
    face_emotion = predict_face_emotion(face_image_path)
    
    print(f"Audio Emotion: {audio_emotion}")
    print(f"Facial Emotion: {face_emotion}")
    
    # Simple fusion logic (example: if both agree, use that; else prioritize face)
    if audio_emotion.lower() in face_emotion.lower():
        final = face_emotion
    elif face_emotion in ['Happy', 'Sad']:
        final = face_emotion
    else:
        final = audio_emotion.capitalize()
    
    print(f"Final Multimodal Emotion: {final}")
    return final


In [5]:
audio_path = "/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-01-01-01-01-01.wav"
face_image_path = "/kaggle/input/fer2013/train/happy/Training_10019449.jpg"

combine_emotions(audio_path, face_image_path)


I0000 00:00:1746072225.450901      92 service.cc:148] XLA service 0x787d30003910 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746072225.452533      92 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1746072225.452572      92 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1746072225.665214      92 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
Audio Emotion: sad
Facial Emotion: Happy
Final Multimodal Emotion: Happy


I0000 00:00:1746072227.830743      92 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


'Happy'

In [6]:
import pandas as pd

df = pd.read_csv("/kaggle/input/spotify-million-song-dataset/spotify_millsongdata.csv")
print(df.columns)


Index(['artist', 'song', 'link', 'text'], dtype='object')


In [7]:
df = df.dropna(subset=["text", "artist", "song"])


In [9]:
# Clean lyrics
import re
def clean_lyrics(text):
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'[^a-zA-Z ]', '', text)
    text = text.lower()
    return text

df["clean_lyrics"] = df["text"].apply(clean_lyrics)


In [12]:
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk

# Download NLTK stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

# Load the dataset
df = pd.read_csv("/kaggle/input/spotify-million-song-dataset/spotify_millsongdata.csv")

# Show column names to verify structure (optional)
print("Columns:", df.columns)

# Drop rows with missing values
df = df.dropna(subset=["text", "artist", "song"])

# Clean lyrics/text
def clean_lyrics(text):
    text = re.sub(r'\[.*?\]', '', text)        # Remove [Verse], [Chorus], etc.
    text = re.sub(r'\n', ' ', text)            # Replace newlines with space
    text = re.sub(r'[^a-zA-Z ]', '', text)     # Remove punctuation/special chars
    text = text.lower()
    return text

df["clean_lyrics"] = df["text"].apply(clean_lyrics)

# Sample for performance
df = df.sample(5000, random_state=42).reset_index(drop=True)

# Create TF-IDF Matrix
tfidf = TfidfVectorizer(stop_words=stopwords.words('english'), max_features=5000)
tfidf_matrix = tfidf.fit_transform(df["clean_lyrics"])

# Compute cosine similarity matrix
cos_sim = cosine_similarity(tfidf_matrix)

# Recommend similar songs based on lyrics
def recommend_songs(song_title, top_n=20):
    idx = df[df['song'].str.lower() == song_title.lower()].index
    if len(idx) == 0:
        print(" Song not found in dataset!")
        return
    idx = idx[0]
    sim_scores = list(enumerate(cos_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]

    print(f"\n🎵 Recommendations based on '{df.iloc[idx]['song']}' by {df.iloc[idx]['artist']}:")
    for i, (song_idx, score) in enumerate(sim_scores):
        song = df.iloc[song_idx]
        print(f"{i+1}. {song['song']} by {song['artist']} (Similarity Score: {score:.2f})")

# Example usage
recommend_songs("Everything Must Change")  # Replace with a song title from the dataset



[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Columns: Index(['artist', 'song', 'link', 'text'], dtype='object')

🎵 Recommendations based on 'Everything Must Change' by Nina Simone:
1. Fly Into The Sun by Lou Reed (Similarity Score: 0.28)
2. Here Comes The Sun by Demi Lovato (Similarity Score: 0.27)
3. Silvery Rain by Olivia Newton-John (Similarity Score: 0.27)
4. It's Just The Sun by Don McLean (Similarity Score: 0.27)
5. No More Rain by Kylie Minogue (Similarity Score: 0.25)
6. The Surest Things Can Change by Gino Vannelli (Similarity Score: 0.25)
7. Sun Showers by Louis Armstrong (Similarity Score: 0.24)
8. Rain by Ian Hunter (Similarity Score: 0.23)
9. One Of The Mysteries Of Life by Tom T. Hall (Similarity Score: 0.23)
10. Lazy Old Sun by Kinks (Similarity Score: 0.21)
11. Blind Man by Aerosmith (Similarity Score: 0.21)
12. Night Lights by Nat King Cole (Similarity Score: 0.21)
13. Rain by Kiss (Similarity Score: 0.21)
14. Flashing Lights by Kanye West (Similarity Score: 0.20)
15. Red Lights by Uriah Heep (Similarity Score: 0

In [17]:
def recommend_by_mood(detected_mood, top_n=20):
    keywords = mood_keywords.get(detected_mood.lower(), [])
    if not keywords:
        print("😕 No matching keywords for mood:", detected_mood)
        return

    mask = df["clean_lyrics"].apply(lambda x: any(word in x for word in keywords))
    filtered_df = df[mask]
    if filtered_df.empty:
        print("🙁 No matching songs found for this mood.")
        return

    sampled = filtered_df.sample(min(top_n, len(filtered_df)))
    print(f"\n🎧 Mood: {detected_mood.upper()} — Top {len(sampled)} Song Recommendations:\n")
    for i, row in sampled.iterrows():
        print(f"{row['song']} by {row['artist']}")


In [18]:
mood_keywords = {
    "happy": ["happy", "joy", "smile", "sun", "celebrate", "dance", "love"],
    "sad": ["cry", "tears", "alone", "heartbreak", "miss", "lost", "pain"],
    "angry": ["fire", "rage", "fight", "scream", "hate", "revenge"],
    "calm": ["peace", "calm", "breeze", "slow", "relax", "soft"],
    "excited": ["party", "tonight", "wild", "jump", "high", "energy"],
    "neutral": ["life", "time", "way", "dream", "go", "day"]
}


In [19]:
def combine_emotions(audio_path, face_image_path):
    audio_emotion = predict_audio_emotion(audio_path)
    face_emotion = predict_face_emotion(face_image_path)
    
    print(f"Audio Emotion: {audio_emotion}")
    print(f"Facial Emotion: {face_emotion}")
    
    if audio_emotion.lower() in face_emotion.lower():
        final = face_emotion
    elif face_emotion in ['Happy', 'Sad']:
        final = face_emotion
    else:
        final = audio_emotion.capitalize()

    final = final.lower()  # Normalize for mood matching
    print(f"Final Multimodal Emotion: {final}")
    return final


In [20]:
# Paths to test audio and image
audio_path = "/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-01-01-01-01-01.wav"
face_image_path = "/kaggle/input/fer2013/train/happy/Training_10019449.jpg"

# Detect emotion
final_mood = combine_emotions(audio_path, face_image_path)

# Recommend songs based on detected mood
recommend_by_mood(final_mood)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Audio Emotion: sad
Facial Emotion: Happy
Final Multimodal Emotion: happy

🎧 Mood: HAPPY — Top 20 Song Recommendations:

That's The Way Of The World by Incognito
Snow by Bing Crosby
Never On Sunday by Andy Williams
I Love Rock 'n' Roll by Britney Spears
Deeper by Hillsong United
I'll Kiss You by Cyndi Lauper
I Don't Believe You by Air Supply
Face To The Highway by Tom Waits
Roc Me Out by Rihanna
While There's Still Time by Styx
Drugs Or Jesus by Tim McGraw
Till They Take My Heart Away by Kyla
I Found Love With You by Dusty Springfield
O Come All Ye Faithful by Mariah Carey
Something Better by Marianne Faithfull
Cross On The Highway by Hank Williams Jr.
I'm Waiting For The Day by Beach Boys
I Hurt You by Pretenders
Goodbye's by Celine Dion
Only Love Would Know by Gordon Lightfoot


In [21]:
import sounddevice as sd
import librosa
import numpy as np
import cv2
import threading
import time
from keras.models import load_model
import joblib
from keras.preprocessing import image

# Load your models (audio + face)
audio_model = joblib.load("/kaggle/input/audio-emotion/other/default/1/emotion_model.pkl")
face_model = load_model("/kaggle/input/face-emotion/other/default/1/model_optimal.h5")

# Define the mood keywords (for song recommendations)
mood_keywords = {
    "happy": ["happy", "joy", "smile", "sun", "celebrate", "dance", "love"],
    "sad": ["cry", "tears", "alone", "heartbreak", "miss", "lost", "pain"],
    "angry": ["fire", "rage", "fight", "scream", "hate", "revenge"],
    "calm": ["peace", "calm", "breeze", "slow", "relax", "soft"],
    "excited": ["party", "tonight", "wild", "jump", "high", "energy"],
    "neutral": ["life", "time", "way", "dream", "go", "day"]
}

# Define emotion labels for the face model
label_dict = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Neutral', 5: 'Sad', 6: 'Surprise'}

# Function to predict audio emotion
def predict_audio_emotion(audio_data):
    mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=16000, n_mfcc=128), axis=1).reshape(1, -1)
    pred = audio_model.predict(mfcc)[0]
    return "happy" if pred == 0 else "sad"

# Function to predict facial emotion
def predict_face_emotion(image_data):
    img = cv2.resize(image_data, (48, 48))
    img = np.array(img).reshape(1, 48, 48, 1) / 255.0
    result = face_model.predict(img)
    label_idx = np.argmax(result[0])
    return label_dict[label_idx]

# Function to recommend songs based on the detected mood
def recommend_by_mood(detected_mood):
    keywords = mood_keywords.get(detected_mood.lower(), [])
    if not keywords:
        print("No matching keywords for mood:", detected_mood)
        return

    # Here, replace with your song recommendation logic (filter by mood)
    print(f"\n🎧 Mood: {detected_mood.upper()} — Top Song Recommendations:")

# Function to combine audio and facial emotions
def combine_emotions(audio_path, face_image):
    audio_emotion = predict_audio_emotion(audio_path)
    face_emotion = predict_face_emotion(face_image)
    
    if audio_emotion.lower() == face_emotion.lower():
        final = face_emotion
    elif face_emotion in ['Happy', 'Sad']:
        final = face_emotion
    else:
        final = audio_emotion.capitalize()

    return final

# Start webcam capture (facial emotion)
def capture_face_and_predict():
    cap = cv2.VideoCapture(0)  # Webcam
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        face_emotion = predict_face_emotion(frame)
        print("Facial Emotion:", face_emotion)
        
        # After predicting, pass this emotion to the recommender system
        recommend_by_mood(face_emotion)  # Or combine face and audio emotion

        # Show live webcam feed (optional)
        cv2.imshow("Face Emotion Detection", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Start audio capture (audio emotion)
def capture_audio_and_predict():
    # Use Sounddevice to capture live audio from microphone
    def audio_callback(indata, frames, time, status):
        if status:
            print(status)
        audio_emotion = predict_audio_emotion(indata)
        print("Audio Emotion:", audio_emotion)
        
        # Use audio emotion to recommend songs
        recommend_by_mood(audio_emotion)

    # Set up audio stream
    with sd.InputStream(callback=audio_callback, channels=1, samplerate=16000):
        while True:
            time.sleep(0.1)

# Run both captures in parallel (audio + video)
def run_live_system():
    # Run audio capture in a separate thread
    audio_thread = threading.Thread(target=capture_audio_and_predict)
    audio_thread.start()

    # Run facial emotion detection in main thread
    capture_face_and_predict()

# Start the live system
run_live_system()


ModuleNotFoundError: No module named 'sounddevice'