In [1]:
# Install required packages
!pip install gradio==3.50.2 librosa tensorflow scikit-learn soundfile pydub

import numpy as np
import librosa
import gradio as gr
import tempfile
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation

# Constants
SAMPLE_RATE = 22050
DURATION = 3  # seconds
EMOTIONS = ['neutral', 'happy', 'sad', 'angry', 'fear', 'disgust', 'surprise']
GENDER_LABELS = ['female', 'male']

# Enhanced feature extraction
def extract_features(audio_data, sr=SAMPLE_RATE):
    try:
        # MFCC features with more robust extraction
        mfccs = librosa.feature.mfcc(
            y=audio_data,
            sr=sr,
            n_mfcc=13,
            n_fft=2048,
            hop_length=512
        )
        mfccs_mean = np.mean(mfccs.T, axis=0)

        # Add pitch and harmonic features for better gender detection
        pitch = librosa.yin(audio_data, fmin=80, fmax=400, sr=sr)
        pitch_mean = np.mean(pitch)

        return np.append(mfccs_mean, pitch_mean)
    except Exception as e:
        print(f"Feature extraction error: {e}")
        return None

# Improved mock gender model
def create_gender_model(input_shape):
    model = Sequential([
        Dense(128, input_shape=(input_shape,), activation='relu'),
        Dropout(0.4),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(len(GENDER_LABELS), activation='softmax')
    ])
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

# Emotion model
def create_emotion_model(input_shape):
    model = Sequential([
        Dense(256, input_shape=(input_shape,), activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.4),
        Dense(len(EMOTIONS), activation='softmax')
    ])
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

# Initialize models with proper feature size (14 = 13 MFCC + pitch)
gender_model = create_gender_model(14)
emotion_model = create_emotion_model(14)

# Adjust gender detection threshold (higher means more strict female detection)
FEMALE_THRESHOLD = 0.7  # 70% confidence

def predict_emotion(audio_input):
    try:
        # Handle both file upload and recording
        if isinstance(audio_input, str):
            # File upload
            audio_data, sr = librosa.load(audio_input, sr=SAMPLE_RATE)
        else:
            # Recording - need to handle differently for Colab
            sr, audio_data = audio_input
            if len(audio_data.shape) > 1:  # Convert stereo to mono
                audio_data = librosa.to_mono(audio_data)

            # Save temporary file for librosa processing
            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
                tmp_path = tmp.name
                librosa.output.write_wav(tmp_path, audio_data, sr)

            audio_data, sr = librosa.load(tmp_path, sr=SAMPLE_RATE)

        # Process audio
        audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=SAMPLE_RATE)
        if len(audio_data) > SAMPLE_RATE * DURATION:
            audio_data = audio_data[:SAMPLE_RATE * DURATION]
        else:
            padding = SAMPLE_RATE * DURATION - len(audio_data)
            audio_data = np.pad(audio_data, (0, padding), mode='constant')

        # Extract features
        features = extract_features(audio_data)
        if features is None or len(features) != 14:
            return "Error: Could not extract proper audio features"

        # Predict gender with confidence
        gender_pred = gender_model.predict(features.reshape(1, -1))
        gender_idx = np.argmax(gender_pred)
        gender_confidence = gender_pred[0][gender_idx]
        gender = GENDER_LABELS[gender_idx]

        # Only reject if confidence is high it's male
        if gender == 'male' and gender_confidence > (1 - FEMALE_THRESHOLD):
            return f"Please upload female voice (detected {gender} with {gender_confidence:.1%} confidence)"
        elif gender == 'female' and gender_confidence < FEMALE_THRESHOLD:
            return "Voice gender unclear - please try again with clearer female voice"

        # Predict emotion
        emotion_pred = emotion_model.predict(features.reshape(1, -1))
        emotion_idx = np.argmax(emotion_pred)
        emotion = EMOTIONS[emotion_idx]
        confidence = emotion_pred[0][emotion_idx]

        return f"Detected emotion: {emotion} ({confidence:.1%} confidence)"

    except Exception as e:
        return f"Error processing audio: {str(e)}"

# Create Gradio interface with Colab-compatible settings
def create_interface():
    with gr.Blocks(title="Female Voice Emotion Detection") as interface:
        gr.Markdown("## Female Voice Emotion Detection")
        gr.Markdown("Please speak clearly for 3-5 seconds")

        with gr.Tab("Upload Audio File"):
            gr.Markdown("Upload a recorded voice note (WAV format preferred)")
            file_input = gr.Audio(type="filepath", label="Audio File")
            file_output = gr.Textbox(label="Analysis Result")
            file_button = gr.Button("Analyze Upload")

        with gr.Tab("Record Voice"):
            gr.Markdown("Record directly from your microphone")
            gr.Markdown("Note: In Colab, you need to click 'Allow' when prompted for microphone access")
            record_input = gr.Audio(source="microphone", type="numpy", label="Recording")
            record_output = gr.Textbox(label="Analysis Result")
            record_button = gr.Button("Analyze Recording")

        file_button.click(predict_emotion, inputs=file_input, outputs=file_output)
        record_button.click(predict_emotion, inputs=record_input, outputs=record_output)

    return interface

# Launch the interface
if __name__ == "__main__":
    interface = create_interface()
    try:
        # Try launching with share=True first
        interface.launch(share=True)
    except Exception as e:
        print("Failed to launch with share=True, trying without sharing...")
        interface.launch()



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
IMPORTANT: You are using gradio version 3.50.2, however version 4.44.1 is available, please upgrade.
--------
Running on public URL: https://e8762c8b238a86f10d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
