In [10]:
import librosa
import librosa.display
import numpy as np
import pandas as pd
import python_speech_features

from utils.instrument_data import DataPreprocessor


def envelope(y, rate, threshold, window_size = 1/10):
    mask = []
    y = pd.Series(y).apply(np.abs)
    y_mean = y.rolling(window=int(rate * window_size), min_periods=1, center=True).mean()  # window_size = 1/10th of a second ( default )
    for mean in y_mean:
        if mean > threshold:
            mask.append(True)
        else:
            mask.append(False)
    return mask

# Define parameters
signal, rate = librosa.load("wavfiles/juice_wrld_guitar.wav", duration=3)  # Your audio signal
rate = 22050  # Sampling rate
frame_size_sec = 0.092  # Frame size in seconds
frame_shift_sec = 0.023  # Frame shift in seconds
num_mfcc = 13  # Number of MFCC coefficients


mask = envelope(signal, rate, threshold=0.005, window_size=1/10)
signal = signal[mask]
signal = signal[:3*rate]

rmse_metric_x = librosa.feature.rms(y=signal)
spectral_centroid_x = librosa.feature.spectral_centroid(y=signal, sr=rate)
spectral_bandwidth_x = librosa.feature.spectral_bandwidth(y=signal, sr=rate)
spectral_rolloff_x = librosa.feature.spectral_rolloff(y=signal, sr=rate)
zero_crossing_rate_x = librosa.feature.zero_crossing_rate(y=signal)
librosa_default_mfccs_x = librosa.feature.mfcc(y=signal, sr=rate, n_mfcc=40)

# print(rmse_metric_x)
print("RMSE shape : ", rmse_metric_x.shape)
# print(spectral_centroid_x)
print("Spectral centroid shape : ", spectral_centroid_x.shape)
# print(spectral_bandwidth_x)
print("Spectral bandwidth shape : ", spectral_bandwidth_x.shape)
# print(spectral_rolloff_x)
print("Spectral rolloff shape : ", spectral_rolloff_x.shape)
# print(zero_crossing_rate_x)
print("Zero crossing rate shape : ", zero_crossing_rate_x.shape)
# print(librosa_default_mfccs_x)
print("Librosa default MFCC shape : ", librosa_default_mfccs_x.shape)

# Calculate frame size and hop length in samples
frame_size = int(frame_size_sec * rate)
frame_shift = int(frame_shift_sec * rate)

# Calculate MFCCs using librosa
mel_lib = librosa.feature.mfcc(y=signal, sr=rate, n_mfcc=num_mfcc)

# Calculate MFCCs using python_speech_features
mel = python_speech_features.mfcc(signal, rate, winlen=frame_size_sec, winstep=frame_shift_sec, numcep=num_mfcc, nfilt=26, nfft=512).T

# Output shapes
print("Shape from librosa:", mel_lib.shape)
print("Shape from python_speech_features:", mel.shape)

print(list(zero_crossing_rate_x.flatten()))

RMSE shape :  (1, 130)
Spectral centroid shape :  (1, 130)
Spectral bandwidth shape :  (1, 130)
Spectral rolloff shape :  (1, 130)
Zero crossing rate shape :  (1, 130)
Librosa default MFCC shape :  (40, 130)


In [11]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [12]:
arr = [
    [[1,2,3],
     [1,2,3]]
]
in_arr = np.array(arr)
print(in_arr.shape)
arr = np.array(arr).T
print(arr.shape)

(1, 2, 3)
(3, 2, 1)


In [2]:
from scipy.io import wavfile

rate, sig = wavfile.read("D:\LICENTA\Philarmonia_equalised\percussion\\banana-shaker__long_forte_shaken.mp3")
print(rate)
print(sig)

22050
[-1.0186341e-10  5.0931703e-11 -2.9103830e-11 ...  0.0000000e+00
  0.0000000e+00  0.0000000e+00]


In [4]:
rate, sig = wavfile.read("D:\LICENTA\marsyas_gtzan_genres\\genres_original\\blues\\blues_00000.wav")
print(rate)
print(sig)

22050
[  240   544   250 ... -1822 -2001 -2103]


In [16]:
import joblib
from pprint import pprint

# Load the saved Random Forest model
best_rf = joblib.load("genre_models/gtzan_models/gtzan_knn.joblib")

# Retrieve the parameters of the best estimator
best_params = best_rf.get_params()

# Print the parameters in a nicely formatted way
pprint(best_params)

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 1,
 'weights': 'distance'}
