In [3]:
#I got clear output in saved output
import cv2
import numpy as np
import librosa
import joblib
from keras.models import load_model
from moviepy.editor import VideoFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

# Load the trained model and label encoder
model = load_model('voice_emotion_recognition.h5')
label_encoder = joblib.load('label_encoder.pkl')

# Define emotions and initialize trends
emotions = ['anger', 'calm', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
emotion_trends = {emotion: [] for emotion in emotions}

# Function to extract features from an audio chunk
def extract_features(audio_chunk, rate):
    mfccs = librosa.feature.mfcc(y=audio_chunk, sr=rate, n_mfcc=40)
    mfccs = np.mean(mfccs.T, axis=0)
    return mfccs

# Function to predict emotion from audio
def predict_audio_emotion(audio_chunk, rate):
    features = extract_features(audio_chunk, rate)
    features = np.reshape(features, (1, -1, 1))
    prediction = model.predict(features)
    label = label_encoder.inverse_transform([np.argmax(prediction)])
    return label[0], prediction[0]

# Function to generate a simulated alpha wave signal (EEG-like waveform)
def generate_alpha_wave(length, amplitude=10, frequency=10):
    t = np.linspace(0, 1, length)
    alpha_wave = amplitude * np.sin(2 * np.pi * frequency * t)
    return alpha_wave

# Function to draw the dashboard with EEG-like waveform
def draw_dashboard(frame, emotion_probs, emotion_trends):
    # Dashboard settings
    dashboard_width = 800
    dashboard_height = frame.shape[0]
    dashboard = np.ones((dashboard_height, dashboard_width, 3), dtype=np.uint8) * 255  # White background

    # Bar settings
    bar_height = dashboard_height // 8
    bar_width = 300
    wave_width = 300
    shadow_height = 20  # Height of the shaded region
    gap = 10  # Gap between shaded region and the bottom of the rectangle
    vertical_padding = 10  # Padding to prevent waves from touching edges

    for i, emotion in enumerate(emotions):
        # Draw emotion labels
        cv2.putText(dashboard, emotion, (10, (i + 1) * bar_height - vertical_padding), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

        # Draw emotion probability bars
        cv2.rectangle(dashboard, (110, i * bar_height), (110 + bar_width, (i + 1) * bar_height - vertical_padding), (0, 0, 0), 2)
        cv2.rectangle(dashboard, (110, i * bar_height), 
                      (110 + int(bar_width * emotion_probs[i]), (i + 1) * bar_height - vertical_padding), 
                      (0, 255, 0), -1)  # Green color for bars

        # Draw emotion trend waveforms centered vertically in the box
        wave_x = 450
        wave_y_start = i * bar_height + vertical_padding
        wave_y_end = (i + 1) * bar_height - vertical_padding

        # Calculate the center of the rectangle
        wave_y_center = (wave_y_start + wave_y_end) // 2

        # Draw the rectangle for the waveform
        cv2.rectangle(dashboard, (wave_x, i * bar_height), (wave_x + wave_width, (i + 1) * bar_height - vertical_padding), (0, 0, 0), 2)

        # Normalize trend data
        if len(emotion_trends[emotion]) > 1:
            # Add the new data point
            emotion_trends[emotion].append(emotion_probs[i])
            if len(emotion_trends[emotion]) > wave_width:
                emotion_trends[emotion].pop(0)

            norm_trend = (np.array(emotion_trends[emotion]) - np.min(emotion_trends[emotion])) / (np.max(emotion_trends[emotion]) - np.min(emotion_trends[emotion]) + 1e-6)
            norm_trend = (norm_trend * (wave_y_end - wave_y_start) // 2).astype(int)  # Scale with padding around the center

            # Create points for the shaded region
            points = []
            for j in range(len(norm_trend)):
                x = wave_x + j
                y = wave_y_center - norm_trend[j]
                points.append((x, y))
            
            # Define the bottom edge of the shaded region with a gap
            bottom_edge = min(wave_y_end, wave_y_center + shadow_height)
            points.append((wave_x + len(norm_trend) - 1, bottom_edge - gap))
            points.append((wave_x, bottom_edge - gap))
            points.append((wave_x, wave_y_center - norm_trend[0]))

            # Convert points to numpy array
            points = np.array(points, np.int32)
            points = points.reshape((-1, 1, 2))

            # Draw the shaded region in light black (gray-like color)
            cv2.fillPoly(dashboard, [points], (222, 218, 218))  # Light shaded black

            # Draw the actual waveform in black
            for j in range(1, len(norm_trend)):
                cv2.line(dashboard, (wave_x + j - 1, wave_y_center - norm_trend[j - 1]), 
                         (wave_x + j, wave_y_center - norm_trend[j]), (0, 0, 0), 2)  # Black color for waveform

    # Combine the video frame and the dashboard
    combined_frame = np.hstack((frame, dashboard))

    return combined_frame


# Function to process the video
import tempfile

def process_video_with_dashboard(video_path, output_path, box_width=500, box_height=500):
    # Open video clip using moviepy
    clip = VideoFileClip(video_path)

    # Extract audio from the clip
    audio = clip.audio

    # Cache to hold predictions for current second
    cached_emotion_probs = np.zeros(len(emotions))
    emotion_trends = {emotion: [] for emotion in emotions}  # Reset trends

    # VideoWriter to save the processed video
    temp_video = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    video_writer = None

    for t, frame in clip.iter_frames(with_times=True, fps=clip.fps):
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)  # Convert from RGB (moviepy) to BGR (OpenCV)

        # Resize the frame to fit into the box
        frame_resized = cv2.resize(frame, (box_width, box_height))

        # Extract corresponding audio chunk for the current frame
        audio_chunk = audio.subclip(t, t + (1 / clip.fps)).to_soundarray()

        if len(audio_chunk) > 0:
            audio_chunk = audio_chunk.flatten()  # Flatten stereo to mono

            # Predict emotion at the current timestamp
            emotion_label, emotion_probs = predict_audio_emotion(audio_chunk, audio.fps)
            cached_emotion_probs[:] = emotion_probs  # Update cache with new probabilities

            # Update emotion trends
            for i, emotion in enumerate(emotions):
                emotion_trends[emotion].append(emotion_probs[i])
                if len(emotion_trends[emotion]) > 300:  # Limit length of trend data to wave_width
                    emotion_trends[emotion].pop(0)

        # Draw the dashboard on the frame with cached predictions
        combined_frame = draw_dashboard(frame_resized, cached_emotion_probs, emotion_trends)

        # Initialize the video writer once the first frame is processed
        if video_writer is None:
            height, width, _ = combined_frame.shape
            video_writer = cv2.VideoWriter(temp_video.name, cv2.VideoWriter_fourcc(*'mp4v'), clip.fps, (width, height))

        # Write the processed frame to the video file
        video_writer.write(combined_frame)

    # Release the video writer
    video_writer.release()

    # Combine the processed video with the original audio
    final_clip = VideoFileClip(temp_video.name).set_audio(audio)
    
    # Save the final output video
    final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")

# Example usage
process_video_with_dashboard(r"C:\Users\User\Downloads\laugh.mp4", 'output_laugh.mp4')



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step





[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Moviepy - Building video output_laugh.mp4.
MoviePy - Writing audio in output_laughTEMP_MPY_wvf_snd.mp4


                                                                                                                       

MoviePy - Done.
Moviepy - Writing video output_laugh.mp4



                                                                                                                       

Moviepy - Done !
Moviepy - video ready output_laugh.mp4


In [1]:
#done and dusted#################################
import cv2
import numpy as np
import librosa
import joblib
from keras.models import load_model
from moviepy.editor import VideoFileClip

# Load the trained model and label encoder
model = load_model('voice_emotion_recognition.h5')
label_encoder = joblib.load('label_encoder.pkl')

# Define emotions and initialize trends
emotions = ['anger', 'calm', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
emotion_trends = {emotion: [] for emotion in emotions}

# Function to extract features from an audio chunk
def extract_features(audio_chunk, rate):
    mfccs = librosa.feature.mfcc(y=audio_chunk, sr=rate, n_mfcc=40)
    mfccs = np.mean(mfccs.T, axis=0)
    return mfccs

# Function to predict emotion from audio
def predict_audio_emotion(audio_chunk, rate):
    features = extract_features(audio_chunk, rate)
    features = np.reshape(features, (1, -1, 1))
    prediction = model.predict(features)
    label = label_encoder.inverse_transform([np.argmax(prediction)])
    return label[0], prediction[0]

# Function to generate a simulated alpha wave signal (EEG-like waveform)
def generate_alpha_wave(length, amplitude=10, frequency=10):
    t = np.linspace(0, 1, length)
    alpha_wave = amplitude * np.sin(2 * np.pi * frequency * t)
    return alpha_wave

# Function to draw the dashboard with EEG-like waveform
def draw_dashboard(frame, emotion_probs, emotion_trends):
    # Dashboard settings
    dashboard_width = 800
    dashboard_height = frame.shape[0]
    dashboard = np.ones((dashboard_height, dashboard_width, 3), dtype=np.uint8) * 255  # White background

    # Bar settings
    bar_height = dashboard_height // 8
    bar_width = 300
    wave_width = 300
    shadow_height = 20  # Height of the shaded region
    gap = 10  # Gap between shaded region and the bottom of the rectangle
    vertical_padding = 10  # Padding to prevent waves from touching edges

    for i, emotion in enumerate(emotions):
        # Draw emotion labels
        cv2.putText(dashboard, emotion, (10, (i + 1) * bar_height - vertical_padding), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

        # Draw emotion probability bars
        cv2.rectangle(dashboard, (110, i * bar_height), (110 + bar_width, (i + 1) * bar_height - vertical_padding), (0, 0, 0), 2)
        cv2.rectangle(dashboard, (110, i * bar_height), 
                      (110 + int(bar_width * emotion_probs[i]), (i + 1) * bar_height - vertical_padding), 
                      (0, 255, 0), -1)  # Green color for bars

        # Draw emotion trend waveforms centered vertically in the box
        wave_x = 450
        wave_y_start = i * bar_height + vertical_padding
        wave_y_end = (i + 1) * bar_height - vertical_padding

        # Calculate the center of the rectangle
        wave_y_center = (wave_y_start + wave_y_end) // 2

        # Draw the rectangle for the waveform
        cv2.rectangle(dashboard, (wave_x, i * bar_height), (wave_x + wave_width, (i + 1) * bar_height - vertical_padding), (0, 0, 0), 2)

        # Normalize trend data
        if len(emotion_trends[emotion]) > 1:
            # Add the new data point
            emotion_trends[emotion].append(emotion_probs[i])
            if len(emotion_trends[emotion]) > wave_width:
                emotion_trends[emotion].pop(0)

            norm_trend = (np.array(emotion_trends[emotion]) - np.min(emotion_trends[emotion])) / (np.max(emotion_trends[emotion]) - np.min(emotion_trends[emotion]) + 1e-6)
            norm_trend = (norm_trend * (wave_y_end - wave_y_start) // 2).astype(int)  # Scale with padding around the center

            # Create points for the shaded region
            points = []
            for j in range(len(norm_trend)):
                x = wave_x + j
                y = wave_y_center - norm_trend[j]
                points.append((x, y))
            
            # Define the bottom edge of the shaded region with a gap
            bottom_edge = min(wave_y_end, wave_y_center + shadow_height)
            points.append((wave_x + len(norm_trend) - 1, bottom_edge - gap))
            points.append((wave_x, bottom_edge - gap))
            points.append((wave_x, wave_y_center - norm_trend[0]))

            # Convert points to numpy array
            points = np.array(points, np.int32)
            points = points.reshape((-1, 1, 2))

            # Draw the shaded region in light black (gray-like color)
            cv2.fillPoly(dashboard, [points], (222, 218, 218))  # Light shaded black

            # Draw the actual waveform in black
            for j in range(1, len(norm_trend)):
                cv2.line(dashboard, (wave_x + j - 1, wave_y_center - norm_trend[j - 1]), 
                         (wave_x + j, wave_y_center - norm_trend[j]), (0, 0, 0), 2)  # Black color for waveform

    # Combine the video frame and the dashboard
    combined_frame = np.hstack((frame, dashboard))

    return combined_frame


# Function to process the video
def process_video_with_dashboard(video_path, box_width=500, box_height=500, frame_rate=30):
    # Open video capture
    cap = cv2.VideoCapture(video_path)

    # Get the video's frame rate (FPS)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_delay = int(1000 / frame_rate)  # Convert frame rate to milliseconds per frame

    # Extract audio using moviepy
    clip = VideoFileClip(video_path)
    audio = clip.audio
    audio_rate = audio.fps

    # Cache to hold predictions for current second
    cached_emotion_probs = np.zeros(len(emotions))

    current_frame = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize the frame to fit into the box
        frame_resized = cv2.resize(frame, (box_width, box_height))

        # Determine the time position of the current frame
        time_position = current_frame / fps

        # Extract corresponding audio chunk for the current frame
        audio_chunk = audio.subclip(time_position, time_position + (1/fps)).to_soundarray()

        # Predict emotion every N frames (e.g., once per second or so)
        if current_frame % int(fps) == 0:  # Predict once per second
            audio_chunk = audio_chunk.flatten()  # Flatten stereo to mono
            if len(audio_chunk) > 0:
                emotion_label, emotion_probs = predict_audio_emotion(audio_chunk, audio_rate)
                cached_emotion_probs = emotion_probs  # Update cache with new probabilities

                # Update emotion trends
                for i, emotion in enumerate(emotions):
                    emotion_trends[emotion].append(emotion_probs[i])
                    if len(emotion_trends[emotion]) > 300:  # Limit length of trend data to wave_width
                        emotion_trends[emotion].pop(0)

        # Draw the dashboard on the frame with cached predictions
        combined_frame = draw_dashboard(frame_resized, cached_emotion_probs, emotion_trends)

        # Display the frame with the dashboard
        cv2.imshow('Video with Emotion Detection Dashboard', combined_frame)

        if cv2.waitKey(frame_delay) & 0xFF == ord('q'):  # Use frame_delay for synchronized playback
            break

        current_frame += 1

    cap.release()
    cv2.destroyAllWindows()

# Example usage with a video file
process_video_with_dashboard('sample1.mp4')



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2