### Importation des Bibliothèques

In [1]:
import wave
import wavio
import joblib
import librosa
import pyaudio
import numpy as np
import pandas as pd
import soundfile as sf
import sounddevice as sd
from pydub import AudioSegment
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler

2024-03-23 23:21:01.455689: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-23 23:21:06.227165: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.8/lib64:/usr/local/cuda-11.8/lib64:/usr/local/cuda-11.8/lib64:/usr/local/cuda-11.8/lib64:/home/hassouni/miniconda3/envs/tf/lib/
2024-03-23 23:21:06.227397: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; 

### Importation des Models

In [69]:
# Load the model
model = load_model('model_all95.h5')
encoder = joblib.load('encoder_all.pkl')
scaler = joblib.load('scaler.pkl')

### Features Extractions Fuctions

In [70]:
def zcr(data, frame_length=2048, hop_length=512):
    zcr = librosa.feature.zero_crossing_rate(y=data, frame_length=frame_length, hop_length=hop_length)
    return np.squeeze(zcr)

def rmse(data, frame_length=2048, hop_length=512):
    rmse = librosa.feature.rms(y=data, frame_length=frame_length, hop_length=hop_length)
    return np.squeeze(rmse)
    
def mfcc(data, sr, frame_length=2048, hop_length=512, flatten: bool = True):
    mfcc_feature = librosa.feature.mfcc(y=data, sr=sr)
    return np.squeeze(mfcc_feature.T) if not flatten else np.ravel(mfcc_feature.T)

In [71]:
def extract_features(data, sr, frame_length=2048, hop_length=512):
    result = np.array([])
    result = np.hstack((result,
                        zcr(data, frame_length, hop_length),
                        rmse(data, frame_length, hop_length),
                        mfcc(data, sr, frame_length, hop_length)
                      ))
    return result

### Preduction fonction from path

In [72]:
def predict_emotion(data,sample_rate):
    if len(data) < 55125 and len(data)> 50000:
        data = np.pad(data, (0, 55125 - len(data)), mode='constant')
    data = extract_features(data,sample_rate)
    data = scaler.transform(data.reshape(1, -1))
    pred = model.predict(data)
    return encoder.inverse_transform(pred)[0][0]

In [73]:
def prediction(path):
    audio_duration = get_audio_duration(path)
    if audio_duration > 4:
        segments = segment_recording(path)
        predicts = []
        for segment in segments:
            predicts.append(predict_emotion(segment[0],segment[1]))
        return predicts
    else:
        data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
        return predict_emotion(data,sample_rate)

### Audio recording Function

In [74]:
def record_audio(file_path, duration=4, sample_rate=22050):
    """
    Record audio from the microphone and save it to a WAV file.

    Parameters:
    - file_path (str): The path to the WAV file to save.
    - duration (float): The duration of the recording in seconds (default 4.0 seconds).
    - sample_rate (int): The sampling rate in Hz (default 22050 Hz).
    """
    print("Start speaking...")
    # Calculate the number of frames based on the duration and sample rate
    num_frames = int(duration * sample_rate)
    # Record audio with floating point data type
    audio_data = sd.rec(frames=num_frames, samplerate=sample_rate, channels=1, dtype='float32')
    sd.wait()
    print("Recording finished.")

    # Save the recorded audio to a WAV file
    wavio.write(file_path, audio_data, sample_rate, sampwidth=4)  # sampwidth=4 for float32

In [75]:
def get_audio_duration(file_path):
    # Open the audio file
    with sf.SoundFile(file_path, 'r') as f:
        # Get the number of frames and the sampling frequency
        num_frames = len(f)
        samplerate = f.samplerate
        # Calculate the duration in seconds
        duration = num_frames / samplerate
    return duration

In [76]:
def segment_recording(path):
    # Charger le fichier audio
    audio = AudioSegment.from_file(path)
    
    # Durée d'un segment en millisecondes ( secondes)
    segment_duration = 3500
    
    # Diviser l'audio en segments de 5 secondes
    segments = []
    for start_time in range(0, len(audio), segment_duration):
        end_time = start_time + segment_duration
        segment = audio[start_time:end_time]
        segments.append(segment)
    file_names = []
    for i, segment in enumerate(segments):
        file_names.append(segment.export(f"segment_{i+1}.wav", format="wav"))
    audios = []
    for file in file_names:
        if get_audio_duration(file) > 2.5:
            audios.append(librosa.load(file.name,duration=2.5, offset=0.6))
        
    return audios

In [84]:
output_file = "recorded_audio.wav"
record_audio(output_file, duration=4)

Start speaking...
Recording finished.


In [85]:
prediction("recorded_audio.wav")





'angry'

In [79]:
prediction('/home/hassouni/PFA/model/Ravdess/Actor_04/03-01-07-01-01-01-04.wav')





'disgust'

In [89]:
import pyaudio
import wave

def record_audio(file_path, duration=4, sample_rate=22050, chunk_size=2048, format=pyaudio.paInt16, channels=1):
    """
    Record audio from the microphone and save it to a WAV file.

    Parameters:
    - file_path (str): The path to the WAV file to save.
    - duration (float): The duration of the recording in seconds (default 4.0 seconds).
    - sample_rate (int): The sampling rate in Hz (default 44100 Hz).
    - chunk_size (int): The size of audio chunks to read at a time (default 1024).
    - format: The format of the audio data (default pyaudio.paInt16).
    - channels: The number of audio channels (default 1).
    """
    audio = pyaudio.PyAudio()

    # Open stream
    stream = audio.open(format=format,
                        channels=channels,
                        rate=sample_rate,
                        input=True,
                        frames_per_buffer=chunk_size)

    print("Recording...")
    frames = []

    # Record audio
    for i in range(0, int(sample_rate / chunk_size * duration)):
        data = stream.read(chunk_size)
        frames.append(data)

    print("Recording finished.")

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio to a WAV file
    with wave.open(file_path, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(audio.get_sample_size(format))
        wf.setframerate(sample_rate)
        wf.writeframes(b''.join(frames))


In [92]:
record_audio("recorded_audio1.wav")

Recording...
Recording finished.


In [93]:
prediction("recorded_audio1.wav")





'angry'