In [1]:
%pylab inline
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
from audiofile_read import *  
from rp_extract import rp_extract
#from rp_plot import *  
import librosa
import os
import pandas as pd
import scipy

Populating the interactive namespace from numpy and matplotlib


In [2]:
def features_extraction_accompaniment(track_path):
    # adapt the fext array to your needs:
    fext = ['rp','ssd','rh','mvd'] # sh, tssd, trh
    samplerate, samplewidth, wavedata = audiofile_read(track_path, normalize=False)
    features = rp_extract(wavedata,
                      samplerate,
                      extract_rp   = ('rp' in fext),          # extract Rhythm Patterns features
                      extract_ssd  = ('ssd' in fext),           # extract Statistical Spectrum Descriptor
                      #extract_sh   = ('sh' in fext),          # extract Statistical Histograms
                      extract_tssd = ('tssd' in fext),          # extract temporal Statistical Spectrum Descriptor
                      extract_rh   = ('rh' in fext),           # extract Rhythm Histogram features
                      extract_trh  = ('trh' in fext),          # extract temporal Rhythm Histogram features
                      extract_mvd  = ('mvd' in fext),        # extract Modulation Frequency Variance Descriptor
                      spectral_masking=True,
                      transform_db=True,
                      transform_phon=True,
                      transform_sone=True,
                      fluctuation_strength_weighting=True,
                      skip_leadin_fadeout=1,
                      step_width=1)
    res = []
    for key in fext:
        res.extend(features[key])
    return res

In [4]:
def features_extraction_voice(track_path):
#     mfccs = librosa.feature.mfcc(wavedata, sr=sr,n_mfcc=26)
#     deltas = librosa.feature.delta(mfccs)
    wavedata, sr = librosa.load(track_path)
    discretization = np.linspace(0, len(wavedata),4,dtype=int)
    mfccs = []
    deltas = []
    for i in range(len(discretization) - 1):
        mfccs_tmp = librosa.feature.mfcc(wavedata[discretization[i]:discretization[i+1]], sr=sr,n_mfcc=26)
        mfccs.extend(np.median(mfccs_tmp,axis=1)[:12])
        deltas.extend(np.median(mfccs_tmp,axis=1)[:12])
    return mfccs + deltas
    

In [5]:
b = features_extraction_voice('../audio/electro/01-max_cooper_tom_hodge-symmetry.mp3')

KeyboardInterrupt: 

In [None]:
wavedata, sr = librosa.load('../audio/electro/01-max_cooper_tom_hodge-symmetry.mp3')

In [None]:
len(wavedata)

In [None]:
librosa.core.fft_frequencies()

In [None]:
def track_preprocessing(track_path):
    return features_extraction_accompaniment(track_path) + features_extraction_voice(track_path)

In [None]:
c = track_preprocessing('../audio/electro/01-max_cooper_tom_hodge-symmetry.mp3')

In [None]:
import datetime

In [None]:
datetime.datetime.now()

In [None]:
# calculating distance between tracks
print(datetime.datetime.now())
for i in os.listdir('../audio/')[1:2]:
    for j in os.listdir('../audio/'+i+'/')[1:4]:
        tmp = track_preprocessing('../audio/'+i+'/'+j)
        #pd.DataFrame(tmp).to_csv('features_final/'+j[:-4]+'.csv',header=None,index=None)
print(datetime.datetime.now())
        

In [31]:
d = {}
for file in os.listdir('features_final/'):
    d[file] = pd.read_csv('features_final/'+file,header=None)[0].values

In [32]:
distances = pd.DataFrame(columns=d.keys(),index=d.keys())

In [33]:
for basic_track in d:
    for compared_track in d:
        distances.loc[basic_track,compared_track] = scipy.spatial.distance.cosine(d[basic_track],d[compared_track])

In [None]:
distances.to_excel('tracks_similarity_matrix.xlsx')

In [84]:
recommendations = {}
for col in distances.columns:
    recommendations[col] = distances[col].sort_values()[:20].index

In [88]:
pd.DataFrame(recommendations).to_excel('recommendations_for_tracks.xlsx',index=None)

In [6]:
def recommendation_for_new_track(tracks_path):
    tp = track_preprocessing(tracks_path)
    d = {}
    for file in os.listdir('features_final/'):
        d[file] = pd.read_csv('features_final/'+file,header=None)[0].values
    dist = pd.DataFrame(columns=['dist'],index=d.keys())
    for col in d.keys():
        dist.loc[col,'dist'] = scipy.spatial.distance.cosine(d[col],tp)
    return dist['dist'].sort_values()[:20].index

In [8]:
recommendation_for_new_track('../audio/electro/01-max_cooper_tom_hodge-symmetry.mp3')

Decoded .mp3 with: ffmpeg -v 1 -y -i ../audio/electro/01-max_cooper_tom_hodge-symmetry.mp3 C:\Users\Admin\AppData\Local\Temp\6b3f7031-e37a-4fa8-b896-971985f06235.wav


Index(['01-max_cooper_tom_hodge-symmetry.csv',
       '10. Crucify Me (Feat. Lights).csv', '05. Chelsea Smile.csv',
       '06. The Comedown.csv',
       '04. The Sadness Will Never End (Feat. Sam Carter).csv',
       '02. Pray For Plagues.csv', '03 - You Kill Me with Sirence.csv',
       '01. Traitors Never Play Hang-Man.csv', '11 - Only In Dreams.csv',
       '16 - Northern Lights.csv', '03. Diamonds Aren't Forever.csv',
       '08. It Never Ends.csv', '07. Blessed With A Curse.csv',
       '05 - Face For Today.csv', '09 - Change The Skyline.csv',
       '02 - Last Night In The City.csv', '06-max_cooper-order_from_chaos.csv',
       '10 - Butterfly Girl.csv',
       '04. Hans Zimmer - Now We Are Free (Gladiator).csv',
       '13 - Planet Roaring.csv'],
      dtype='object')

In [None]:
rec = pd.read_excel('recommendations_for_tracks.xlsx')
janres = {}
for i in os.listdir('../audio/')[1:]:
    for j in os.listdir('../audio/'+i+'/')[1:]:
        janres[j[:-3]+'csv'] = i
        
rec_janres = rec.copy()
for col in rec_janres:
    rec_janres[col] = rec_janres[col].map(janres)
rec_janres.to_excel('recommendations_for_tracks_janres_ed.xlsx')

In [2]:
rec = pd.read_excel('recommendations_for_tracks.xlsx')

In [18]:
janres = {}
for i in os.listdir('../audio/')[1:]:
    for j in os.listdir('../audio/'+i+'/')[1:]:
        janres[j[:-3]+'csv'] = i

In [21]:
rec_janres = rec.copy()
for col in rec_janres:
    rec_janres[col] = rec_janres[col].map(janres)

In [23]:
rec_janres.to_excel('recommendations_for_tracks_janres_ed.xlsx')

In [12]:
rec_janres = pd.read_excel('recommendations_for_tracks_janres_ed.xlsx')

In [13]:
recomm = pd.read_excel('recommendations_for_tracks.xlsx')

In [14]:
for col in recomm.columns:
    recomm[col] = recomm[col] + rec_janres[col].map(lambda x: ' ('+x+')')

In [16]:
recomm.to_excel('recommendations_for_tracks_with_janres.xlsx')