# Trabalho Prático 2

In [2]:
import librosa
import librosa.display
import sounddevice as sd
import warnings
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy.stats as st

## Extração de features

In [18]:
def read_features(fileName, delim=','):
    return np.genfromtxt(fileName, delimiter=delim)

In [19]:
def clean_data(data):
    nl, nc = data.shape
    return data[1:, 1:(nc-1)]

In [22]:
def normalize_features(features):
    normalized = np.zeros(features.shape)
    nl, nc = features.shape

    for i in range(nc):
        vmax = features[:, i].max()
        vmin = features[:, i].min()
        normalized[:, i] = (features[:, i] - vmin) / (vmax - vmin)
    
    return normalized

In [23]:
top100 = read_features('./dataset/top100_features.csv')
top100 = clean_data(top100)
top100_normalized = normalize_features(top100)

np.savetxt('./dataset/top100_features_normalized.csv', top100_normalized, delimiter=',')

In [29]:
def get_musics():
    files = os.listdir('./dataset/musics/')
    print(f'{len(files)} musics found')

In [30]:
get_musics()

900 musics found


In [62]:
def get_statistics(feature):
    print(feature.shape)
    nl, nc = feature.shape
    feature_statistics = np.zeros((nl, 7))
    for i in range(nl):
        mean = feature[i, :].mean()
        std = feature[i, :].std()
        skew = st.skew(feature[i, :])
        kurtosis = st.kurtosis(feature[i, :])
        median = np.median(feature[i, :])
        mx = feature[i, :].max()
        mn = feature[i, :].min()
        feature_statistics[i, :] = np.array([mean, skew, std, kurtosis, median, mx, mn]) 
    return feature_statistics.flatten()

In [60]:
def extract_features(audioName):
    sample_rate = 22050
    use_mono = True
    warnings.filterwarnings('ignore')
    f0_min_freq = 20
    f0_max_freq = sample_rate//2
    mfcc_dim = 13

    music, _ = librosa.load(audioName, sr=sample_rate, mono=use_mono)

    mfcc = librosa.feature.mfcc(music, n_mfcc=mfcc_dim)
    sp_centroid = librosa.feature.spectral_centroid(music)
    sp_bandwidth = librosa.feature.spectral_bandwidth(music)
    sp_contrast = librosa.feature.spectral_contrast(music, n_bands=6)
    sp_flatness = librosa.feature.spectral_flatness(music)
    sp_rolloff = librosa.feature.spectral_rolloff(music)    
    f0 = librosa.yin(music, fmin=f0_min_freq, fmax=f0_max_freq)
    rms = librosa.feature.rms(music)
    zcr = librosa.feature.zero_crossing_rate(music)
    tempo = librosa.beat.tempo(music)
    
    features = [mfcc, sp_centroid, sp_bandwidth, sp_contrast, sp_flatness, sp_rolloff, f0, rms, zcr]

    statistics = np.zeros(190)
    prev = 0
    for f in features:
        stats = get_statistics(f)
        print(stats, len(stats))
        statistics[prev:prev+len(stats)] = stats
        prev += len(stats)
    
    print(statistics)

    

In [63]:
extract_features('./dataset/musics/MT0000004637.mp3')

(13, 1295)
[-2.30354630e+02 -7.10278009e-01  8.80596695e+01  1.78219293e+00
 -2.31613525e+02 -5.62219925e+01 -5.81123352e+02  1.00476913e+02
 -5.92350362e-01  1.98916683e+01  2.38946372e+00  1.00342712e+02
  1.54593323e+02  0.00000000e+00 -3.22840118e+01 -4.76053778e-01
  3.55913696e+01 -8.92932828e-01 -2.45100098e+01  3.11522980e+01
 -1.22276459e+02  2.87400341e+01  7.85626681e-01  1.94590759e+01
  3.50872563e-01  2.47215195e+01  9.88948669e+01 -1.49498472e+01
 -9.85208797e+00 -1.38887335e+00  1.31589537e+01  2.94671647e+00
 -7.51127052e+00  1.83777161e+01 -7.07550888e+01 -1.56655493e+01
 -5.88064407e-01  1.65415745e+01 -3.50207622e-01 -1.26428833e+01
  1.89461994e+01 -6.37574654e+01 -5.18498659e+00  6.29570232e-01
  1.07078934e+01  9.92998722e-01 -6.13803768e+00  3.63544731e+01
 -3.53819580e+01 -1.68022370e+00 -2.18032616e-03  8.89746094e+00
  1.94943861e-01 -1.63631082e+00  2.66682339e+01 -3.29146652e+01
 -1.43120260e+01 -2.79983162e-01  8.71228600e+00  1.44656712e-01
 -1.37812634e+

ValueError: not enough values to unpack (expected 2, got 1)