In [None]:
import pydub as pdb
import os 
import tqdm
import librosa
import numpy as np

import skfda
import math 

from scipy.optimize import minimize
from matplotlib import pyplot as plt

In [4]:
our_files = os.listdir("C:/Users/Leonardo/Desktop/UNIVERSITA/MS/I anno/II semestre/SL/FINAL_PROJECT/Nuova cartella/processed-recs")
paper_files = os.listdir("C:/Users/Leonardo/Desktop/UNIVERSITA/MS/I anno/II semestre/SL/FINAL_PROJECT/Nuova cartella/processed-recs-paper")

In [5]:
dic_1 = {file:{'Label':None,
             'Author':None,
             'MFCC':None} for file in our_files}

dic_2 = {file:{'Label':None,
               'MFCC':None} for file in paper_files}

In [8]:
for file in tqdm.tqdm(our_files):
    path = 'C:/Users/Leonardo/Desktop/UNIVERSITA/MS/I anno/II semestre/SL/FINAL_PROJECT/Nuova cartella/processed-recs/' + file
    audio, _ = librosa.load(path, sr=None)  
    dic_1[file]['MFCC'] = librosa.feature.mfcc(y=audio, sr=44100, n_mfcc=12)
    dic_1[file]['Author'] = file.split('_')[2]
    dic_1[file]['Label']  = float(file.split('_')[3][:-4])

for file in tqdm.tqdm(paper_files):
    path = 'C:/Users/Leonardo/Desktop/UNIVERSITA/MS/I anno/II semestre/SL/FINAL_PROJECT/Nuova cartella/processed-recs-paper/' + file
    audio, _ = librosa.load(path, sr=None)
    dic_2[file]['MFCC'] = librosa.feature.mfcc(y=audio, sr=44100, n_mfcc=12)
    dic_2[file]['Label'] = float(file.split('_')[3][:-4])

100%|██████████| 253/253 [00:07<00:00, 32.25it/s]
100%|██████████| 333/333 [00:06<00:00, 54.29it/s]


In [9]:
# Functional tool kit

basis = skfda.representation.basis.Fourier(n_basis=80)

def FDA_generator(array):
    points = np.linspace(0,1,len(array))
    fd_obj = skfda.FDataGrid(data_matrix=[array],
                             grid_points=points)
    return fd_obj

# First transform pipeline: retrieve eGFC from the extracted features

def processing(matrix, basis):
    output = np.zeros((np.shape(matrix)[0],basis.n_basis))
    for i in range(0,np.shape(matrix)[0]):
        fda_obj = FDA_generator(matrix[i,])
        output[i,:] = fda_obj.to_basis(basis).coefficients
    return output

In [10]:
design_tensor = np.zeros((12,81,len(dic_2.values())))

for i in tqdm.tqdm(range(len(dic_2.keys()))):
    key = list(dic_2.keys())[i]
    design_tensor[:,:,i] = processing(dic_2[key]['MFCC'],basis)

y_true = np.array([dic_2[key]['Label'] for key in dic_2.keys()])

100%|██████████| 333/333 [00:13<00:00, 25.41it/s]


In [26]:
# Non-parametric regression on a vectorial functional space 

def K(t):
    return(0.5*np.exp(-0.5*t**2))

def L2(x1,x2):
    return np.linalg.norm(x1-x2)

def mNorm(X):
    return np.sqrt(np.max(np.abs(np.linalg.eigvals(np.matmul(np.transpose(X),X)))))

def VF_dist(X1,X2):
    L = np.shape(X1)[0]
    D = np.zeros((L,L))

    for i in range(0,L):
        for k in range(0,L):
            D[i,k] = L2(X1[i,:],X2[i,:])

    return mNorm(D)

def supDist(X1,X2):
    L = np.shape(X1)[0]
    D = np.zeros(L)

    for i in range(0,L):
        D[i] = L2(X1[i,:],X2[i,:])
    
    return np.max(D)

def weightedCompWiseDist(X1,X2,omega):
    L = np.shape(X1)[0]
    D = np.zeros(L)

    for i in range(0,L):
        D[i] = L2(X1[i,:],X2[i,:])
    
    return np.sum(D*omega)
   
def distDist(X1,X2):
    L = np.shape(X1)[0]
    D = np.zeros(L)

    for i in range(0,L):
        D[i] = L2(X1[i,:],X2[i,:])
    
    return np.linalg.norm(D)

def KR_estimator(h,x,X,Y,omega):
    weights = np.zeros(len(Y))
    for i in range(len(Y)):
        weights[i] = K(weightedCompWiseDist(x,X[:,:,i],omega)/h)
    return np.sum(weights*Y)/np.sum(weights)

# Validation functions

def LOOCV(X,Y,h):
    err = np.zeros(len(Y))
    for i in range(0,len(Y)):
        x = X[:,:,i]
        y = Y[i]
        _X = X[:,:,[j for j in range(0,len(Y)) if j != i]]
        _Y = Y[[j for j in range(0,len(Y)) if j != i]]
        pred = KR_estimator(h,x,_X,_Y)
        err[i] = L2(pred,y)
    return(np.mean(err))

def LOOCV_fit(X,Y,grid):
    errs = np.zeros(len(grid))
    for p in range(len(grid)):
        errs[p] = LOOCV(X,Y,grid[p])
    return grid[np.argmin(errs)]

# Weighted component-wise distance for kernel regression

def objective(params):
    omega = params[0:12]
    h = params[12]
    output = 0 
    for i in range(len(y_true)):
        x = design_tensor[:,:,i]
        y = y_true[i]
        _X = design_tensor[:,:,[j for j in range(0,len(y_true)) if j != i]]
        _Y = y_true[[j for j in range(0,len(y_true)) if j != i]]
        pred = KR_estimator(h,x,_X,_Y,omega)
        output += (pred-y)**2
    return output

In [29]:
minimize(objective, x0 = np.array([1,1,1,1,1,1,1,1,1,1,1,1,1e115]))