In [7]:
from __future__ import print_function

import librosa
import librosa.display
import IPython.display
import numpy as np
import pandas as pd
import math
import glob
import csv
from numpy.fft import fft, ifft, fft2, ifft2, fftshift

In [8]:
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline

In [9]:
collection = "../data/audio_data_collection.csv"
audio_data_folder = './../data/20_sec_embed/'

In [10]:
def read_from_files(file):
    with open(file, 'r') as f:
        reader = csv.reader(f)
        audio_track_data = list(reader)
    
    for i in range(len(audio_track_data)):
        # audio_track_data[i] = [float(j) for j in audio_track_data[i]]
        audio_track_data[i] = np.array(audio_track_data[i]).astype(np.float)
    
    return audio_track_data

In [11]:
def resizer(line1, line2):
    len_line1 = len(line1)
    len_line2 = len(line2)
    
    new_line1 = []
    new_line2 = []
    
    if(len_line1>len_line2):
        new_line1 = line1[0:len_line2]
        new_line2 = line2
    else:
        new_line1 = line1
        new_line2 = line2[0:len_line1]
    
    return new_line1, new_line2

In [12]:
def cross_correlation_using_fft(x, y):
    f1 = fft(x)
    f2 = fft(np.flipud(y))
    cc = np.real(ifft(f1 * f2))
    return fftshift(cc)

In [17]:
# shift 0 means that y starts 'shift' time steps before x # shift 0 means that y starts 'shift' time steps after x
def compute_shift(x, y):
    assert len(x) == len(y)
    c = cross_correlation_using_fft(x, y)
    assert len(c) == len(x)
    zero_index = int(len(x) / 2) - 1
    shift = zero_index - np.argmax(c)
    return shift

In [18]:
def abs_mean(anylist):
    coeff = [abs(number) for number in anylist]
    result = np.mean(coeff)
    
    return result

In [52]:
#other correlation calculation approache
def compute_tracks_correlation(track_1, track_2):
    track_correlations = []

    for i in range(len(track_1)):
        line1, line2 = resizer(track_1[i], track_2[i])
        #coefs = abs_mean(np.correlate(line1, line2, mode='full'))
        coefs = abs_mean(cross_correlation_using_fft(line1, line2))
        
        track_correlations.append(coefs)
        #print(coefs)
        
    return track_correlations

In [20]:
def calculate_similarity(track_comparation_coeffs):
    coeffs = []
    
    for coeff in track_comparation_coeffs:
        coeff = [abs(number) for number in coeff]
        result = np.mean(coeff) # np.mean(coeff)
        
        coeffs.append(result)
    
    return coeffs

In [21]:
def get_top(l, n):
    result = []
    tl = sorted(l, reverse=True)[:n]

    for t in tl:
        result.append(l.index(t))
        
    return result

In [23]:
df = pd.read_csv(collection, sep='\t')
df = df.drop(['Unnamed: 0'], axis=1)

files = glob.glob(audio_data_folder + '/*.csv', recursive=True)

### Next step will take a while, it's reading data into memory

In [24]:
tracks_data = []

for file in files:
    track_data = read_from_files(file)
    tracks_data.append(track_data)

In [33]:
len(tracks_data)

101

In [25]:
# track_1 = read_from_files(files[0])
# track_2 = read_from_files(files[1])

# track_correlations2 = compute_tracks_correlation(track_1, track_2)
# track_correlations2

### Algorithm will find recommendation for composition below

In [43]:
q_index = 90
track_q = tracks_data[90]

df[q_index:q_index+1]

Unnamed: 0,i,id,name,filepath,genre
90,90,91eedc9117,05. Big For Your Boots.mp3,../data/audio/rap/05. Big For Your Boots.mp3,rap


#### Next step, again, will take a while, it compares correlation for track frequencies

In [53]:
track_comparation_coeffs = []
h = 0 

for track in tracks_data:
    # print(h)
    
    track_comparation_coeff = compute_tracks_correlation(track_q, track)
    track_comparation_coeffs.append(track_comparation_coeff)
    h +=1

In [45]:
comparation_result = calculate_similarity(track_comparation_coeffs)

In [47]:
result = get_top(comparation_result, 10)

In [50]:
#rec_music = []
k = 1

print("For track")
print(df.iloc[q_index]['name'],"  [", df.iloc[q_index]['genre'], "]", sep="")
print("")


print("We recommend you to listen:")
print("")

for i in result:
    #a = get_row(df, i)
    
    print(str(k),") ", df.iloc[i]['name'],"  [", df.iloc[i]['genre'], "]", sep="")
    # print("")
    k += 1

For track
05. Big For Your Boots.mp3  [rap]

We recommend you to listen:

1) 05. Big For Your Boots.mp3  [rap]
2) 09. Don't Go (Feat. Lights).mp3  [metal]
3) 11. Return Of The Rucksack.mp3  [rap]
4) 14. Sleepwalking.mp3  [metal]
5) 06. The Comedown.mp3  [metal]
6) 08-max_cooper_feat_kathrin_deboer-impermanence.mp3  [electro]
7) 02. Russell Watson - Mattinata.mp3  [classics]
8) 04 - Pressure Off.mp3  [pop]
9) 11. Shadow Moses.mp3  [metal]
10) 08. It Never Ends.mp3  [metal]
