# Trabalho Prático 2

In [52]:
import librosa
import librosa.display
import sounddevice as sd
import warnings
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy.stats as st
from sklearn import preprocessing

## Extração de features

In [53]:
def read_features(fileName, delim=','):
    return np.genfromtxt(fileName, delimiter=delim)

In [54]:
def clean_data(data):
    nl, nc = data.shape
    return data[1:, 1:(nc-1)]

In [55]:
def normalize(feature):
    return preprocessing.minmax_scale(feature, feature_range=(0, 1))

In [56]:
def normalize_features(features):
    return np.apply_along_axis(normalize, 0, features) # 0 = columns

In [57]:
top100 = read_features('./dataset/top100_features.csv')
top100 = clean_data(top100)
top100_normalized = normalize_features(top100)

np.savetxt('./dataset/top100_features_normalized.csv', top100_normalized, delimiter=',')

In [58]:
def get_musics(dir='./dataset/musics/'):
    files = os.listdir(dir)
    #print(f'{len(files)} musics found')
    return sorted(files)

In [59]:
def get_statistics(feature):
    if len(feature.shape) == 1:
        feature = feature.reshape(1, feature.shape[0])

    nl = feature.shape[0]
    feature_statistics = np.zeros((nl, 7))

    for i in range(nl):
        mean = feature[i, :].mean()
        std = feature[i, :].std()
        skew = st.skew(feature[i, :])
        kurtosis = st.kurtosis(feature[i, :])
        median = np.median(feature[i, :])
        mx = feature[i, :].max()
        mn = feature[i, :].min()
        feature_statistics[i, :] = np.array([mean, std, skew, kurtosis, median, mx, mn]) 
    

    return feature_statistics.flatten()

In [60]:
def extract_features(audioName):
    sample_rate = 22050
    use_mono = True
    warnings.filterwarnings('ignore')
    f0_min_freq = 20
    f0_max_freq = sample_rate//2
    mfcc_dim = 13

    music, _ = librosa.load(audioName, sr=sample_rate, mono=use_mono)

    mfcc = librosa.feature.mfcc(music, n_mfcc=mfcc_dim)
    sp_centroid = librosa.feature.spectral_centroid(music)
    sp_bandwidth = librosa.feature.spectral_bandwidth(music)
    sp_contrast = librosa.feature.spectral_contrast(music, n_bands=6)
    sp_flatness = librosa.feature.spectral_flatness(music)
    sp_rolloff = librosa.feature.spectral_rolloff(music)    
    f0 = librosa.yin(music, fmin=f0_min_freq, fmax=f0_max_freq)
    f0[f0 == f0_max_freq] = 0
    rms = librosa.feature.rms(music)
    zcr = librosa.feature.zero_crossing_rate(music)
    tempo = librosa.beat.tempo(music)
    
    features = [mfcc, sp_centroid, sp_bandwidth, sp_contrast, sp_flatness, sp_rolloff, f0, rms, zcr]

    statistics = np.zeros(190)
    prev = 0
    for f in features:
        stats = get_statistics(f)
        #print(stats, len(stats))
        statistics[prev:prev+len(stats)] = stats
        prev += len(stats)

    statistics[-1] = tempo
    return statistics.reshape(1, 190)

    

In [61]:
extract_features('./dataset/musics/MT0000004637.mp3')
musics = get_musics()
features = np.zeros((900, 190))
for i, music in enumerate (musics):
    features[i, :] = extract_features(f'./dataset/musics/{music}')
    

KeyboardInterrupt: 

In [62]:
normalized_features = normalize_features(features)

In [63]:
np.savetxt('./features/normalized_features.csv', normalized_features, delimiter=',', fmt='%.6f')
np.savetxt('./features/features.csv', features, delimiter=',', fmt='%.6f')

In [64]:
normalized_features = read_features('./features/normalized_features.csv')

## Métricas de similaridade

In [65]:
def euclidean_distance(featuresA, featuresB):
    return np.sqrt(np.sum(np.square(featuresA-featuresB)))

In [66]:
def manhattan_distance(featuresA, featuresB):
    return np.sum(np.abs(featuresA-featuresB))

In [67]:
def cos_distance(featuresA, featuresB):
    return 1 - (featuresA @ featuresB) / (np.linalg.norm(featuresA) * np.linalg.norm(featuresB))

In [68]:
def calculate_distances(features):
    distance_euclidean = np.zeros((900, 900))
    distance_cos = np.zeros((900, 900))
    distance_manhattan = np.zeros((900, 900))
    for i in range(900):
        for j in range(i+1, 900):
            distance_euclidean[i,j] = distance_euclidean[j,i] = euclidean_distance(features[i, :], features[j,:])
            distance_cos[i,j] =  distance_cos[j,i] = cos_distance(features[i,:], features[j,:])
            distance_manhattan[i,j] = distance_manhattan[j,i] = manhattan_distance(features[i, :], features[j,:])
    return distance_euclidean, distance_cos, distance_manhattan

In [69]:
distance_euclidean, distance_cos, distance_manhattan = calculate_distances(normalized_features)
distance_euclidean100, distance_cos100, distance_manhattan100 = calculate_distances(top100_normalized)

In [70]:
np.savetxt('./features/distances/distance_euclidean.csv', distance_euclidean, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_euclidean100.csv', distance_euclidean100, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_cos.csv', distance_cos, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_cos100.csv', distance_cos100, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_manhattan.csv', distance_manhattan, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_manhattan100.csv', distance_manhattan100, delimiter=',', fmt='%.6f')

In [71]:
musics = get_musics()
music_dict = dict((music, i) for i, music in enumerate(musics))

In [72]:
def get_the_closest_musics(music, features_distances, number=20):
    music_index = music_dict[music]
    print(music_index)
    distances = features_distances[music_index, :]
    sorted_distances = np.argsort(distances)
    
    return [musics[i] for i in sorted_distances[1:number+1]]

In [73]:
print(top100_normalized)

[[0.40833396 0.55892695 0.08767653 ... 0.49339478 0.29677785 0.28512149]
 [0.17434797 0.81639588 0.09864405 ... 0.66779986 0.14344785 0.        ]
 [0.21986539 0.95836272 0.11175995 ... 0.73227772 0.19118833 0.        ]
 ...
 [0.22605274 0.8830899  0.22646862 ... 0.66775423 0.19149261 0.        ]
 [0.33731922 0.82533373 0.28343725 ... 0.6795957  0.26104038 0.53630904]
 [0.38793611 0.80516266 0.21350893 ... 0.54009902 0.13512837 0.26941305]]


In [75]:
for music in get_musics('./Queries/'):
    print('Euclidean:', music, get_the_closest_musics(music, distance_euclidean))
    print('Cos:', music, get_the_closest_musics(music, distance_cos))
    print('Manhattan:', music, get_the_closest_musics(music, distance_manhattan))
    print('Euclidean100:', music, get_the_closest_musics(music, distance_euclidean100))
    print('Cos100:', music, get_the_closest_musics(music, distance_cos100))
    print('Manhattan100:', music, get_the_closest_musics(music, distance_manhattan100))
    print('\n'*5)

10
Euclidean: MT0000202045.mp3 ['MT0011612883.mp3', 'MT0011667212.mp3', 'MT0011697297.mp3', 'MT0011702301.mp3', 'MT0011739779.mp3', 'MT0011786799.mp3', 'MT0011821215.mp3', 'MT0011836290.mp3', 'MT0011862487.mp3', 'MT0011869625.mp3', 'MT0011886148.mp3', 'MT0011594970.mp3', 'MT0011894681.mp3', 'MT0011899302.mp3', 'MT0011906602.mp3', 'MT0011916674.mp3', 'MT0011922080.mp3', 'MT0011922905.mp3', 'MT0011930865.mp3', 'MT0011938737.mp3']
10
Cos: MT0000202045.mp3 ['MT0011612883.mp3', 'MT0011667212.mp3', 'MT0011697297.mp3', 'MT0011702301.mp3', 'MT0011739779.mp3', 'MT0011786799.mp3', 'MT0011821215.mp3', 'MT0011836290.mp3', 'MT0011862487.mp3', 'MT0011869625.mp3', 'MT0011886148.mp3', 'MT0011594970.mp3', 'MT0011894681.mp3', 'MT0011899302.mp3', 'MT0011906602.mp3', 'MT0011916674.mp3', 'MT0011922080.mp3', 'MT0011922905.mp3', 'MT0011930865.mp3', 'MT0011938737.mp3']
10
Manhattan: MT0000202045.mp3 ['MT0011612883.mp3', 'MT0011667212.mp3', 'MT0011697297.mp3', 'MT0011702301.mp3', 'MT0011739779.mp3', 'MT0011786