<h2>Imports<h2>

In [None]:
from common_language import _LANGUAGES
import processing as prlib

from scipy.fft import fft
from mutagen.wave import WAVE
from parselmouth.praat import call
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import librosa
import librosa.display
import parselmouth
import noisereduce as nr

<h2>Features Processing<h2>

Get data frames

In [None]:
_, train_df, test_df, validation_df = prlib.get_dataframes()

Features matrix

In [None]:
attributes_df = pd.DataFrame()
datasets = [('train', train_df), ('test', test_df), ('validation', validation_df)]

for (type, df) in datasets[2:]:
    audios = df['paths'].tolist()
    for i in range(len(audios)):
        audio = audios[i]
        path = prlib.get_path(type, audio)
        print(i, "/", len(audios), path)
        data = prlib.get_data(path)
        row = prlib.get_data_features(path, audio)
        attributes_df = pd.concat([attributes_df, row], ignore_index=True)

matrix = attributes_df.values

<h2>Features visualisation<h2>

Initialisation

In [None]:
data_path = prlib.get_path('validation', validation_df['paths'][3929])
data = prlib.get_data(data_path)
#data = get_data(get_path('train', 'common_voice_en_19688238.wav'))
prlib.sample_rate,data_path, prlib.get_data_features(data_path, validation_df['paths'][3929])

In [None]:
plt.figure(figsize=(12, 8))
plt.scatter(range(len(data)), data, s=0.1)
plt.xlabel('Time')

In [None]:
# https://librosa.org/doc/main/generated/librosa.stft.html

plt.figure(figsize=(12, 8))
#librosa.display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(data)), ref=np.max), y_axis='log', x_axis='time')
librosa.display.specshow(librosa.amplitude_to_db(librosa.stft(data, n_fft=512)), y_axis='log', x_axis='time')
#librosa.display.specshow(librosa.stft(data, n_fft=512), y_axis='log', x_axis='time')
plt.show()

In [None]:
# scatter plot of data
plt.figure(figsize=(12, 8))
plt.scatter(range(len(data)), data, s=0.1)
plt.xlabel('Time')

MFCCs visualisation

In [None]:
normalized_mfccs = prlib.get_Normalized_Mfccs(data)
print(normalized_mfccs.shape)
plt.figure(figsize=(12,6))
librosa.display.specshow(normalized_mfccs, x_axis='time')
plt.colorbar()
plt.title('MFCCs')
plt.tight_layout()
plt.show()

Spectral measurements visualisation

In [None]:
specs = prlib.get_spectral_measurements(data)
t = librosa.frames_to_time(range(len(specs[0])), sr=prlib.sample_rate)

# Plotting the Spectral Features
plt.figure(figsize=(12, 4))
plt.plot(t, specs[0], color='red', label='Centroid')
plt.plot(t, specs[1], color='blue', label='Rolloff')
plt.plot(t, specs[2], color='green', label='Bandwidth')

# For spectral flatness, there's no need to convert to dB since it's a ratio and typically small.
#plt.plot(t, spectral_flatness, color='orange', label='Flatness')

# For spectral contrast, it's common to average over the frequency bands since it returns an array of shape (n_bands, n_frames).
#spectral_contrast_avg = np.mean(spectral_contrast, axis=0)
#plt.plot(t, spectral_contrast_avg, color='black', label='Contrast')

plt.xlabel("Time (s)")
plt.ylabel("Spectral Feature Value")
plt.title("Spectral Features Over Time")
plt.legend(loc='best')
plt.show()

Pitches visualisation

In [None]:
pitch_track = prlib.get_pitch_sequences(data)
plt.figure(figsize=(12, 6))
plt.plot(pitch_track)
plt.xlabel('Time (frames)')
plt.ylabel('Frequency (Hz)')
plt.title('Pitch Track')
plt.show()

RMS Visualisation

In [None]:
rms_energy = prlib.get_rms_energy(data)
frames = range(len(rms_energy[0]))
t = librosa.frames_to_time(frames, sr=prlib.sample_rate)

plt.figure(figsize=(12, 4))
plt.plot(t, rms_energy[0], label='RMS Energy')
plt.xlabel("Time (s)")
plt.ylabel("Energy")
plt.title("RMS Energy Over Time")
plt.legend()
plt.show()