# Trabalho Prático 2

In [10]:
import librosa
import librosa.display
import sounddevice as sd
import warnings
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy.stats as st
from sklearn import preprocessing
import pandas as pd

## Extração de features

In [11]:
def read_features(fileName, delim=','):
    return np.genfromtxt(fileName, delimiter=delim)

In [12]:
def clean_data(data):
    nl, nc = data.shape
    return data[1:, 1:(nc-1)]

In [13]:
def normalize(feature):
    return preprocessing.minmax_scale(feature, feature_range=(0, 1))

In [14]:
def normalize_features(features):
    return np.apply_along_axis(normalize, 0, features) # 0 = columns

In [15]:
top100 = read_features('./dataset/top100_features.csv')
top100 = clean_data(top100)
top100_normalized = normalize_features(top100)

np.savetxt('./dataset/top100_features_normalized.csv', top100_normalized, delimiter=',')

In [16]:
def get_musics(dir='./dataset/musics/'):
    files = os.listdir(dir)
    #print(f'{len(files)} musics found')
    return sorted(files)

In [17]:
def get_statistics(feature):
    if len(feature.shape) == 1:
        feature = feature.reshape(1, feature.shape[0])

    nl = feature.shape[0]
    feature_statistics = np.zeros((nl, 7))

    for i in range(nl):
        mean = feature[i, :].mean()
        std = feature[i, :].std()
        skew = st.skew(feature[i, :])
        kurtosis = st.kurtosis(feature[i, :])
        median = np.median(feature[i, :])
        mx = feature[i, :].max()
        mn = feature[i, :].min()
        feature_statistics[i, :] = np.array([mean, std, skew, kurtosis, median, mx, mn]) 
    

    return feature_statistics.flatten()

In [18]:
def extract_features(audioName):
    sample_rate = 22050
    use_mono = True
    warnings.filterwarnings('ignore')
    f0_min_freq = 20
    f0_max_freq = sample_rate//2
    mfcc_dim = 13

    music, _ = librosa.load(audioName, sr=sample_rate, mono=use_mono)

    mfcc = librosa.feature.mfcc(music, n_mfcc=mfcc_dim)
    sp_centroid = librosa.feature.spectral_centroid(music)
    sp_bandwidth = librosa.feature.spectral_bandwidth(music)
    sp_contrast = librosa.feature.spectral_contrast(music, n_bands=6)
    sp_flatness = librosa.feature.spectral_flatness(music)
    sp_rolloff = librosa.feature.spectral_rolloff(music)    
    f0 = librosa.yin(music, fmin=f0_min_freq, fmax=f0_max_freq)
    f0[f0 == f0_max_freq] = 0
    rms = librosa.feature.rms(music)
    zcr = librosa.feature.zero_crossing_rate(music)
    tempo = librosa.beat.tempo(music)
    
    features = [mfcc, sp_centroid, sp_bandwidth, sp_contrast, sp_flatness, sp_rolloff, f0, rms, zcr]

    statistics = np.zeros(190)
    prev = 0
    for f in features:
        stats = get_statistics(f)
        #print(stats, len(stats))
        statistics[prev:prev+len(stats)] = stats
        prev += len(stats)

    statistics[-1] = tempo
    return statistics.reshape(1, 190)

    

In [19]:
musics = get_musics()

In [20]:
"""extract_features('./dataset/musics/MT0000004637.mp3')
musics = get_musics()
features = np.zeros((900, 190))
for i, music in enumerate (musics):
    features[i, :] = extract_features(f'./dataset/musics/{music}')
""" 

"extract_features('./dataset/musics/MT0000004637.mp3')\nmusics = get_musics()\nfeatures = np.zeros((900, 190))\nfor i, music in enumerate (musics):\n    features[i, :] = extract_features(f'./dataset/musics/{music}')\n"

In [21]:
# normalized_features = normalize_features(features)

In [22]:
"""np.savetxt('./features/normalized_features.csv', normalized_features, delimiter=',', fmt='%.6f')
np.savetxt('./features/features.csv', features, delimiter=',', fmt='%.6f')"""

"np.savetxt('./features/normalized_features.csv', normalized_features, delimiter=',', fmt='%.6f')\nnp.savetxt('./features/features.csv', features, delimiter=',', fmt='%.6f')"

In [23]:
normalized_features = read_features('./features/normalized_features.csv')

## Métricas de similaridade

In [24]:
def euclidean_distance(featuresA, featuresB):
    return np.sqrt(np.sum(np.square(featuresA-featuresB)))

In [25]:
def manhattan_distance(featuresA, featuresB):
    return np.sum(np.abs(featuresA-featuresB))

In [26]:
def cos_distance(featuresA, featuresB):
    return 1 - (featuresA @ featuresB) / (np.linalg.norm(featuresA) * np.linalg.norm(featuresB))

In [27]:
def calculate_distances(features):
    distance_euclidean = np.zeros((900, 900))
    distance_cos = np.zeros((900, 900))
    distance_manhattan = np.zeros((900, 900))
    for i in range(900):
        for j in range(i+1, 900):
            distance_euclidean[i,j] = distance_euclidean[j,i] = euclidean_distance(features[i, :], features[j,:])
            distance_cos[i,j] =  distance_cos[j,i] = cos_distance(features[i,:], features[j,:])
            distance_manhattan[i,j] = distance_manhattan[j,i] = manhattan_distance(features[i, :], features[j,:])
    return distance_euclidean, distance_cos, distance_manhattan

In [28]:
distance_euclidean, distance_cos, distance_manhattan = calculate_distances(normalized_features)
distance_euclidean100, distance_cos100, distance_manhattan100 = calculate_distances(top100_normalized)

In [29]:
np.savetxt('./features/distances/distance_euclidean.csv', distance_euclidean, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_euclidean100.csv', distance_euclidean100, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_cos.csv', distance_cos, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_cos100.csv', distance_cos100, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_manhattan.csv', distance_manhattan, delimiter=',', fmt='%.6f')
np.savetxt('./features/distances/distance_manhattan100.csv', distance_manhattan100, delimiter=',', fmt='%.6f')

In [30]:
musics = get_musics()
music_dict = dict((music, i) for i, music in enumerate(musics))

In [31]:
def get_the_closest_musics(music, features_distances, number=20):
    music_index = music_dict[music]
    distances = features_distances[music_index, :]
    sorted_distances = np.argsort(distances)
    
    return [musics[i] for i in sorted_distances[1:number+1]]

In [32]:
print(top100_normalized)

[[0.40833396 0.55892695 0.08767653 ... 0.49339478 0.29677785 0.28512149]
 [0.17434797 0.81639588 0.09864405 ... 0.66779986 0.14344785 0.        ]
 [0.21986539 0.95836272 0.11175995 ... 0.73227772 0.19118833 0.        ]
 ...
 [0.22605274 0.8830899  0.22646862 ... 0.66775423 0.19149261 0.        ]
 [0.33731922 0.82533373 0.28343725 ... 0.6795957  0.26104038 0.53630904]
 [0.38793611 0.80516266 0.21350893 ... 0.54009902 0.13512837 0.26941305]]


In [33]:
for music in get_musics('./Queries/'):
    print('Euclidean:', music, get_the_closest_musics(music, distance_euclidean))
    print('Cos:', music, get_the_closest_musics(music, distance_cos))
    print('Manhattan:', music, get_the_closest_musics(music, distance_manhattan))
    print('Euclidean100:', music, get_the_closest_musics(music, distance_euclidean100))
    print('Cos100:', music, get_the_closest_musics(music, distance_cos100))
    print('Manhattan100:', music, get_the_closest_musics(music, distance_manhattan100))
    print('\n'*5)

Euclidean: MT0000202045.mp3 ['MT0005129157.mp3', 'MT0011899302.mp3', 'MT0012001409.mp3', 'MT0002233402.mp3', 'MT0007043504.mp3', 'MT0007799677.mp3', 'MT0004428604.mp3', 'MT0002161109.mp3', 'MT0011975274.mp3', 'MT0010624346.mp3', 'MT0009217411.mp3', 'MT0000092267.mp3', 'MT0007766156.mp3', 'MT0001676671.mp3', 'MT0005270263.mp3', 'MT0003787478.mp3', 'MT0011376343.mp3', 'MT0005213723.mp3', 'MT0000732821.mp3', 'MT0002634024.mp3']
Cos: MT0000202045.mp3 ['MT0005129157.mp3', 'MT0012001409.mp3', 'MT0011899302.mp3', 'MT0002233402.mp3', 'MT0007043504.mp3', 'MT0007799677.mp3', 'MT0004428604.mp3', 'MT0011975274.mp3', 'MT0002161109.mp3', 'MT0000092267.mp3', 'MT0010624346.mp3', 'MT0009217411.mp3', 'MT0007766156.mp3', 'MT0003787478.mp3', 'MT0010085729.mp3', 'MT0005270263.mp3', 'MT0001676671.mp3', 'MT0000732821.mp3', 'MT0005213723.mp3', 'MT0026158301.mp3']
Manhattan: MT0000202045.mp3 ['MT0005129157.mp3', 'MT0011899302.mp3', 'MT0012001409.mp3', 'MT0007799677.mp3', 'MT0002233402.mp3', 'MT0007043504.mp3',

In [34]:
def get_metadata(file='./dataset/panda_dataset_taffc_metadata.csv'):
    metadata = np.genfromtxt(file, delimiter=',', dtype="str")
    
    return metadata

In [35]:
metadata_raw = get_metadata()
metadata = metadata_raw[1:, [1, 3, 9, 11]]
print(metadata)

[['"Charlie Poole"' '"Q3"' '"Circular; Greasy; Messy"'
  '"Country; International"']
 ['"Dismember"' '"Q2"' '"Negative; Nervous; Jittery"'
  '"Electronic; International; Pop/Rock"']
 ['"Curse of the Golden Vampire"' '"Q2"'
  '"Fierce; Harsh; Hostile; Menacing; Outrageous; Unsettling"'
  '"Electronic"']
 ...
 ['"Subhumans"' '"Q2"' '"Bitter; Harsh; Outraged"' '"Pop/Rock"']
 ['"Taio Cruz"' '"Q1"'
  '"Bright; Carefree; Energetic; Euphoric; Exciting; Joyous; Slick; Thrilling; Urgent"'
  '"Pop/Rock"']
 ['"Product"' '"Q3"' '"Bitter; Bleak; Snide; Somber; Thuggish"' '"Rap"']]


In [36]:
def metadata_score(metadata):
    scores = np.zeros((900, 900))
    np.fill_diagonal(scores, -1)
    for i in range(metadata.shape[0]):
        for j in range(i+1, metadata.shape[0]):
            for k in range(metadata.shape[1]):
                listA = metadata[i, k][1:-1].split('; ')
                listB = metadata[j, k][1:-1].split('; ')
                for elem in listB:
                    scores[i, j] = scores[j, i] = scores[i, j] + (1 if elem in listA else 0)
    return scores

In [37]:
score_matrix = metadata_score(metadata)

In [38]:
np.savetxt('./dataset/score_matrix.csv', score_matrix, delimiter=',', fmt='%d')

In [39]:
def get_top_metadata_match(music, score_matrix, top=20):    
    music_index = music_dict[music]
    scores = score_matrix[music_index, :]
    # -scores because np.argsort sorts the array in non decreasing order, but we need a non ascending order, so we can get the -values
    scores_sorted = np.argsort(scores, )[-top:][::-1].astype('int16')
    return [musics[i] for i in scores_sorted[:top]]

In [40]:
for music in get_musics('./Queries/'):
    print('Metadata:', music, get_top_metadata_match(music, score_matrix))
    print('\n'*5)

Metadata: MT0000202045.mp3 ['MT0014475915.mp3', 'MT0012862507.mp3', 'MT0000888329.mp3', 'MT0007556029.mp3', 'MT0031898123.mp3', 'MT0004867564.mp3', 'MT0001494812.mp3', 'MT0003022328.mp3', 'MT0011922905.mp3', 'MT0030369896.mp3', 'MT0007453719.mp3', 'MT0034186620.mp3', 'MT0004850690.mp3', 'MT0011938737.mp3', 'MT0034577404.mp3', 'MT0003025046.mp3', 'MT0005285696.mp3', 'MT0002846256.mp3', 'MT0001058887.mp3', 'MT0007766156.mp3']






Metadata: MT0000379144.mp3 ['MT0031951901.mp3', 'MT0014584473.mp3', 'MT0013080259.mp3', 'MT0013416300.mp3', 'MT0011032905.mp3', 'MT0005157391.mp3', 'MT0005253065.mp3', 'MT0008170600.mp3', 'MT0007652281.mp3', 'MT0007349999.mp3', 'MT0007338724.mp3', 'MT0001526386.mp3', 'MT0004287283.mp3', 'MT0005115042.mp3', 'MT0004131058.mp3', 'MT0001929641.mp3', 'MT0001934726.mp3', 'MT0003863509.mp3', 'MT0029877658.mp3', 'MT0003114552.mp3']






Metadata: MT0000414517.mp3 ['MT0010489498.mp3', 'MT0000040632.mp3', 'MT0010487769.mp3', 'MT0012331779.mp3', 'MT0027048677.mp3', 'MT0

In [41]:
def precision(music, score_matrix, distance, top=20):
    m = get_top_metadata_match(music, score_matrix, top)
    d = get_the_closest_musics(music, distance, top)
    intersect = np.intersect1d(m, d)
    p = len(intersect) / top * 100
    print('%.2f%%' % p, intersect)
    return p, intersect


In [42]:
def metric_precision():
    for music in get_musics('./Queries/'):
        print('MUSIC:', music)
        print('\tEuclidean: ', end='')
        precision(music, score_matrix, distance_euclidean)
        print('\tManhattan: ', end='')
        precision(music, score_matrix, distance_manhattan)
        print('\tCos: ', end='')
        precision(music, score_matrix, distance_cos)
        print('\tEuclidean TOP 100: ', end='')
        precision(music, score_matrix, distance_euclidean100)
        print('\tManhattan TOP 100: ', end='')
        precision(music, score_matrix, distance_manhattan100)
        print('\tCos TOP 100: ', end='')
        precision(music, score_matrix, distance_cos100)

In [43]:
metric_precision()

MUSIC: MT0000202045.mp3
	Euclidean: 5.00% ['MT0007766156.mp3']
	Manhattan: 0.00% []
	Cos: 5.00% ['MT0007766156.mp3']
	Euclidean TOP 100: 0.00% []
	Manhattan TOP 100: 0.00% []
	Cos TOP 100: 0.00% []
MUSIC: MT0000379144.mp3
	Euclidean: 0.00% []
	Manhattan: 0.00% []
	Cos: 0.00% []
	Euclidean TOP 100: 0.00% []
	Manhattan TOP 100: 0.00% []
	Cos TOP 100: 0.00% []
MUSIC: MT0000414517.mp3
	Euclidean: 15.00% ['MT0000040632.mp3' 'MT0003243311.mp3' 'MT0003949060.mp3']
	Manhattan: 10.00% ['MT0000040632.mp3' 'MT0003949060.mp3']
	Cos: 10.00% ['MT0000040632.mp3' 'MT0003949060.mp3']
	Euclidean TOP 100: 5.00% ['MT0012331779.mp3']
	Manhattan TOP 100: 10.00% ['MT0000040632.mp3' 'MT0003949060.mp3']
	Cos TOP 100: 10.00% ['MT0000040632.mp3' 'MT0012331779.mp3']
MUSIC: MT0000956340.mp3
	Euclidean: 0.00% []
	Manhattan: 0.00% []
	Cos: 0.00% []
	Euclidean TOP 100: 15.00% ['MT0002372242.mp3' 'MT0004293364.mp3' 'MT0014615863.mp3']
	Manhattan TOP 100: 10.00% ['MT0002372242.mp3' 'MT0014615863.mp3']
	Cos TOP 100: 15.

In [44]:
def add_relevant(df, threshold):
    df['relevant'] = np.where(df['mean'] > threshold, True, False)

In [45]:
def add_mean(df, cols):
    df['mean'] = df[cols].mean(axis=1)

In [46]:
def add_std(df, cols):
    df['std'] = df[cols].std(axis=1)

In [47]:
def calculate_precision(ds):
    return ds.value_counts()[True] / ds.count()

In [57]:
def get_likert_scores():
    for music in get_musics('./Queries/'):
        df = pd.read_csv(f'./dataset/results/{music}.csv')
        metadata = df[['M1', 'M2', 'M3']].copy()
        top = df[['T1', 'T2', 'T3']].copy()
        add_mean(top, ['T1', 'T2', 'T3'])
        add_std(top,['T1', 'T2', 'T3'])
        add_mean(metadata, ['M1', 'M2', 'M3'])
        add_std(metadata,['M1', 'M2', 'M3'])
        threshold = 2.5
        add_relevant(top, threshold)
        add_relevant(metadata, threshold)
        print(music, 'TOP 100:', calculate_precision(top['relevant']), 'METADATA:', calculate_precision(metadata['relevant']))

In [58]:
get_likert_scores()

MT0000202045.mp3 TOP 100: 0.2 METADATA: 0.4
MT0000379144.mp3 TOP 100: 0.35 METADATA: 0.25
MT0000414517.mp3 TOP 100: 0.1 METADATA: 0.25
MT0000956340.mp3 TOP 100: 0.65 METADATA: 0.55
