In [None]:
# Depression Detection Audio-Video Fusion with Gradio GUI (Colab Ready)
import tensorflow as tf
import numpy as np
import librosa
import cv2
import joblib
import tempfile
import os
from moviepy.editor import VideoFileClip
import gradio as gr

# Load models
audio_model = tf.keras.models.load_model("C:\\Users\\hp\\Desktop\\New folder\\depression_model_finetuned.h5")
video_model = tf.keras.models.load_model("C:\\Users\\hp\\Desktop\\New folder\\densenet201_depression_model.keras")
scaler = joblib.load("C:\\Users\\hp\\Desktop\\New folder\\scaler.pkl")

IMG_HEIGHT, IMG_WIDTH = 224, 224
FRAMES_PER_VIDEO = 20
MAX_PAD_LEN = 216
VIDEO_INFLUENCE_WEIGHT = 0.4

def extract_frames(video_path, num_frames=FRAMES_PER_VIDEO):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    frames = []
    for fid in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, fid)
        ret, frame = cap.read()
        if not ret:
            frames.append(np.zeros((IMG_HEIGHT, IMG_WIDTH, 3), dtype=np.uint8))
            continue
        frame = cv2.resize(frame, (IMG_WIDTH, IMG_HEIGHT))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)
    cap.release()
    frames = np.array(frames).astype('float32') / 255.0
    return frames

def extract_audio_features(audio_path, max_pad_len=MAX_PAD_LEN):
    y, sr = librosa.load(audio_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=26)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    combined = np.concatenate((mfcc, delta, delta2), axis=0)
    pad_width = max(0, max_pad_len - combined.shape[1])
    combined = np.pad(combined, ((0, 0), (0, pad_width)), mode='constant')
    combined = combined.T[:max_pad_len]
    return combined

def extract_audio_from_video(video_path):
    clip = VideoFileClip(video_path)
    temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
    clip.audio.write_audiofile(temp_audio.name, logger=None)
    return temp_audio.name

def predict_fusion_gradio(video_file):
    audio_path = extract_audio_from_video(video_file)
    audio_features = extract_audio_features(audio_path)
    audio_features = audio_features[np.newaxis, ...]
    audio_features_scaled = scaler.transform(audio_features.reshape(-1, audio_features.shape[2])).reshape(audio_features.shape)
    audio_pred = audio_model.predict(audio_features_scaled)[0][0]
    frames = extract_frames(video_file)
    video_preds = video_model.predict(frames)
    video_emotion_mean = np.mean(video_preds)
    video_adjustment = VIDEO_INFLUENCE_WEIGHT * (video_emotion_mean - 0.5)
    final_pred = audio_pred + video_adjustment
    final_pred = np.clip(final_pred, 0, 1)
    label = "Depressed" if final_pred >= 0.5 else "Not Depressed"
    os.unlink(audio_path)
    return f"Label: {label}"

def launch_gradio():
    gr.Interface(
        fn=predict_fusion_gradio,
        inputs=gr.File(file_types=[".mp4", ".flv"], label="Upload MP4 or FLV Video"),
        outputs="text",
        title="Depression Detection",
        description="Upload a video (MP4/FLV) containing both audio and video. The model will predict depression status."
    ).launch(share=True, debug=True)

launch_gradio()



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://6ac939a055b561fdcc.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 462ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step


In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:
import tensorflow as tf
import numpy as np
import librosa
import cv2
import joblib
import tempfile
import os
from moviepy.editor import VideoFileClip
import gradio as gr

# Load models
audio_model = tf.keras.models.load_model("/content/drive/MyDrive/Depression detection System (Audio + Video)/depression_model_finetuned (2).h5")
video_model = tf.keras.models.load_model("/content/drive/MyDrive/Depression Detection System (Video)/densenet201_depression_model.keras")
scaler = joblib.load("/content/drive/MyDrive/Depression detection System (Audio + Video)/scaler.pkl")

# Constants
IMG_HEIGHT, IMG_WIDTH = 224, 224
FRAMES_PER_VIDEO = 20
MAX_PAD_LEN = 216
VIDEO_INFLUENCE_WEIGHT = 0.4
AUDIO_SR = 16000  # Fixed sample rate for audio consistency

def extract_frames(video_path, num_frames=FRAMES_PER_VIDEO):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if total_frames < num_frames:
        frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    else:
        start = max((total_frames - num_frames) // 2, 0)
        frame_indices = range(start, start + num_frames)

    frames = []
    for fid in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, fid)
        ret, frame = cap.read()
        if not ret:
            frames.append(np.zeros((IMG_HEIGHT, IMG_WIDTH, 3), dtype=np.uint8))
            continue
        frame = cv2.resize(frame, (IMG_WIDTH, IMG_HEIGHT))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame)

    cap.release()
    frames = np.array(frames).astype('float32') / 255.0
    return frames

def extract_audio_features(audio_path, max_pad_len=MAX_PAD_LEN):
    y, sr = librosa.load(audio_path, sr=AUDIO_SR)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=26)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    combined = np.concatenate((mfcc, delta, delta2), axis=0)
    pad_width = max(0, max_pad_len - combined.shape[1])
    combined = np.pad(combined, ((0, 0), (0, pad_width)), mode='constant')
    combined = combined.T[:max_pad_len]
    return combined

def extract_audio_from_video(video_path):
    clip = VideoFileClip(video_path)
    temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
    clip.audio.write_audiofile(temp_audio.name, fps=AUDIO_SR, logger=None)
    return temp_audio.name

def predict_fusion_gradio(video_file):
    try:
        audio_path = extract_audio_from_video(video_file)
        audio_features = extract_audio_features(audio_path)
        audio_features = audio_features[np.newaxis, ...]
        audio_features_scaled = scaler.transform(
            audio_features.reshape(-1, audio_features.shape[2])
        ).reshape(audio_features.shape)

        # Audio model prediction
        audio_pred = audio_model.predict(audio_features_scaled, verbose=0)[0][0]

        # Video model prediction
        frames = extract_frames(video_file)
        video_preds = video_model.predict(frames, verbose=0)
        video_emotion_mean = np.mean(video_preds)
        video_adjustment = VIDEO_INFLUENCE_WEIGHT * (video_emotion_mean - 0.5)

        # Final fusion
        final_pred = audio_pred + video_adjustment
        final_pred = np.clip(final_pred, 0, 1)

        # Label with buffer zone
        if final_pred >= 0.55:
            label = "Depressed"
        elif final_pred <= 0.45:
            label = "Not Depressed"
        else:
            label = "Uncertain"

        os.unlink(audio_path)

        # Log predictions
        return (
            f"Label: {label}\n\n"
            f"Audio Prediction: {audio_pred:.3f}\n"
            f"Video Adjustment: {video_adjustment:.3f}\n"
            f"Final Prediction Score: {final_pred:.3f}"
        )

    except Exception as e:
        return f"Error during prediction: {str(e)}"

def launch_gradio():
    gr.Interface(
        fn=predict_fusion_gradio,
        inputs=gr.File(file_types=[".mp4", ".flv"], label="Upload MP4 or FLV Video"),
        outputs="text",
        title="Depression Detection System",
        description="Upload a video (MP4/FLV) with both audio and video. The model will classify as Depressed / Not Depressed / Uncertain.",
        allow_flagging="never"
    ).launch(share=True, debug=True)

launch_gradio()





Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://246b4b9ceb5530241d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://246b4b9ceb5530241d.gradio.live
