In [None]:
# the module to segment conversations

# Import the AudioSegment class for processing audio and the 
# split_on_silence function for separating out silent chunks.
from pydub import AudioSegment
from pydub.silence import split_on_silence
from  python_speech_features import mfcc

# Define a function to normalize a chunk to a target amplitude.
def match_target_amplitude(aChunk, target_dBFS):
    ''' Normalize given audio chunk '''
    change_in_dBFS = target_dBFS - aChunk.dBFS
    return aChunk.apply_gain(change_in_dBFS)

# Load your audio.
song = AudioSegment.from_mp3("./Conversation_196.wav")

# Split track where the silence is 2 seconds or more and get chunks using 
# the imported function.
chunks = split_on_silence (
    # Use the loaded audio.
    song, 
    # Specify that a silent chunk must be at least 2 seconds or 2000 ms long.
    min_silence_len = 205,
    # Consider a chunk silent if it's quieter than -16 dBFS.
    # (You may want to adjust this parameter.)
    silence_thresh = -49
)

# Process each chunk with your parameters
for i, chunk in enumerate(chunks):
    # Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
    #silence_chunk = AudioSegment.silent(duration=500)
    silence_chunk = AudioSegment.silent(duration=100)

    # Add the padding chunk to beginning and end of the entire chunk.
    audio_chunk = silence_chunk + chunk + silence_chunk
  # Normalize the entire chunk.
    normalized_chunk = match_target_amplitude(audio_chunk, -20.0)

    # Export the audio chunk with new bitrate.
    print("song{0}.wav.".format(i))
    normalized_chunk.export(
        ".//chunk{0}.vaw".format(i),
        bitrate = "192k",
        format = "wav"
    )

In [None]:
#Spectrogram transformation

from scipy.io import wavfile # scipy library to read wav files
import numpy as np
import os
path = os. getcwd()

paths = "./*/*.wav"
dirs = os.listdir(paths)

for i, file in enumerate(dirs):
#AudioName = "chunk0.vaw" # Audio File
    fs, Audiodata = wavfile.read(paths + file)

# Plot the audio signal in time
    import matplotlib.pyplot as plt
#plt.plot(Audiodata)
#plt.title('Audio signal in time',size=16)

# spectrum
    from scipy.fftpack import fft # fourier transform
    n = len(Audiodata) 
    AudioFreq = fft(Audiodata)
    AudioFreq = AudioFreq[0:int(np.ceil((n+1)/2.0))] #Half of the spectrum
    MagFreq = np.abs(AudioFreq) # Magnitude
    MagFreq = MagFreq / float(n)
# power spectrum
    MagFreq = MagFreq**2
    if n % 2 > 0: # ffte odd 
        MagFreq[1:len(MagFreq)] = MagFreq[1:len(MagFreq)] * 2
    else:# fft even
        MagFreq[1:len(MagFreq) -1] = MagFreq[1:len(MagFreq) - 1] * 2 

    plt.figure()
    freqAxis = np.arange(0,int(np.ceil((n+1)/2.0)), 1.0) * (fs / n);
#plt.plot(freqAxis/1000.0, 10*np.log10(MagFreq)) #Power spectrum
#plt.xlabel('Frequency (kHz)'); plt.ylabel('Power spectrum (dB)');

#Spectrogram
    from scipy import signal
    N = 512 #Number of point in the fft 
    f, t, Sxx = signal.spectrogram(Audiodata, fs,window = signal.blackman(N),nfft=N)
    plt.figure()
    plt.pcolormesh(t, f,10*np.log10(Sxx)) # dB spectrogram
#plt.pcolormesh(t, f,Sxx) # Lineal spectrogram
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [seg]')
    plt.title('Spectrogram with scipy.signal',size=16);
    plt.colorbar()
    plt.savefig('Spectogramme'+str(i)+'.png' , dpi = 72 )
    #plt.savefig.export("./colspect/Spectogramme_{i}.png' , dpi = 72 ")
    plt.show()

In [1]:
#Define Model LSTM
import tensorflow as tf

def build_model(num_classes):

    model_lstm = tf.keras.Sequential([
    tf.keras.layers.Lambda(lambda x: x[:,:,:,0], input_shape=(350,350, 3)),
    tf.keras.layers.LSTM(units=256, return_sequences=True),
    #tf.keras.layers.TimeDistributed(conv_2d_layer)(inputs),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.LSTM(64,  return_sequences=True),
    tf.keras.layers.LSTM(32,  return_sequences=False),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Dense(7, activation='sigmoid')
    ])
    return model
model_lstm = build_model(num_classes=7)

model_lstm.compile(
    optimizer=tf.keras.optimizers.Adam(lr=0.0001),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)
print(model_lstm.summary())

history_lstm = model_lstm.fit(train_generator, batch_size=4, epochs=10, verbose=1)


In [None]:
#evaluation du modele lstm

import matplotlib.pyplot as plt

loss_curve = history_lstm.history["loss"]
acc_curve = history_lstm.history["accuracy"]

#loss_val_curve = history.history["val_loss"]
#acc_val_curve = history.history["val_accuracy"]

plt.plot(loss_curve, label="lstm_loss",  marker=11)

plt.legend(loc="upper left")
plt.title("loss")
plt.savefig('acc_lstm.png')
plt.show()

plt.plot(acc_curve, label="lstm_acc",  marker=11)

plt.legend(loc="upper left")
plt.title("Accuracy")
plt.savefig('acc_lstm.png')
plt.show()


