In [1]:
import numpy as np
import pandas as pd

import keras
from keras.models import load_model

import matplotlib.pyplot as plt
from glob import glob

import librosa
import librosa.display
import IPython.display as ipd

from scipy.signal import resample
from scipy.spatial.distance import cosine
from scipy.fft import fft,fftfreq

from tqdm.notebook import tqdm

## Loading Model
model = keras.models.load_model('D:/Projects/Orpheus_ai/DataSet/model_save_logs/Saved Models/VQVAE(7K)_normalized80lowrez_5e-03recloss')

  "class": algorithms.Blowfish,


In [2]:
min_level_db = -80
sr = 22050
hop_length = 1024
n_fft = hop_length*4
SAVE_PATH = 'D:/Projects/Orpheus_ai/DataSet/audio_recreation[VQVAE(7K)]/'

In [3]:
def normalize(S):
    return np.clip((((S - min_level_db) / -min_level_db)*2.)-1., -1, 1)

def denormalize(S):
    return (((np.clip(S, -1, 1)+1.)/2.) * -min_level_db) + min_level_db

def get_similarity_score(y, yhat, sr=sr):
    
    '''
    Input: Original and Recreated Signal
    Output: Cosine Similarity of the FFT of the signals
    '''
    def get_fft(signal,sr,l=205000):
        '''
        Wrapper for FFT creation
        '''
    
        ft = np.abs(fft(signal))
        freq = fftfreq(l, 1.0 / sr)
        return ft[:l//2],freq[:l//2]

    def mmxs(d):
        '''
        Min-Max Normalization
        '''
        return (d-d.min())/(d.max()-d.min())
    
    f,freqs = get_fft(mmxs(y),sr)
    f_hat,_ = get_fft(mmxs(yhat),sr)   
    
    audible_indexes = [np.logical_and(freqs>12,freqs<28000)]
    
    f, f_hat = f[audible_indexes], f_hat[audible_indexes]
    
    absolute = (np.mean((f-f_hat)**2))**0.5 ## Maybe--Bit Ambigious
    spatial =  1 - cosine(f,f_hat)
    
    return absolute,spatial

In [4]:
data_paths = glob('D:/Projects/Orpheus_ai/DataSet/Spectrograms/HalfMEL_dbscale/*')
song_location = pd.read_csv('D:/Projects/Orpheus_ai/DataSet/main_dataframe.csv',index_col=['track_id'])

In [None]:
try:
    full_dataframe = pd.read_csv('D:/Projects/Orpheus_ai/DataSet/recreation_quality.csv',index_col=['track_id'])
    stin = len(full_dataframe)
except:
    full_dataframe = None
    stin = 0
    
dataframe = []
for i,path in tqdm(list(enumerate(data_paths[stin:]))):
    try:
        ## Loading Original Data
        name = path.split("\\")[-1].split('.')[0]
        instrumental_path = song_location.loc[int(name)]['instrumental_path']
        ori_signal, _ = librosa.load(instrumental_path)
        ori_signal = ori_signal[:sr*10]
        S = np.load(path)
        assert S.shape == (128,216)
        
        ## Getting Model Prediction
        model_S = normalize(np.expand_dims(np.array([S]),axis=-1))
        S_hat = denormalize(model.predict(model_S,verbose=0)[0,:,:,0])

        ## Inverting with Griffin Lim
        spec_signal = librosa.feature.inverse.mel_to_audio(librosa.db_to_power(S),sr=sr,n_fft=n_fft,hop_length=hop_length)
        spec_hat_signal = librosa.feature.inverse.mel_to_audio(librosa.db_to_power(S_hat),sr=sr,n_fft=n_fft,hop_length=hop_length)

        abs_mel, spa_mel = get_similarity_score(ori_signal,spec_signal)
        abs_mel_hat, spa_mel_hat = get_similarity_score(ori_signal,spec_hat_signal)
        
        ## Saving Song
        np.save(f'{SAVE_PATH}{name}.npy',spec_hat_signal)
        
        ## Adding Data
        dataframe.append([int(name),f'{SAVE_PATH}{name}.npy',abs_mel,spa_mel,abs_mel_hat,spa_mel_hat])
        
        if i%5 == 0:
            temp_df = pd.DataFrame(dataframe,columns=['track_id','path','abs_mel','spa_mel','abs_mel_hat','spa_mel_hat'])
            temp_df.set_index('track_id',inplace=True)
            
            if full_dataframe is None:
                full_dataframe = temp_df
            else:
                full_dataframe = pd.concat([full_dataframe,temp_df])
                
            full_dataframe.to_csv('D:/Projects/Orpheus_ai/DataSet/recreation_quality.csv')
            
            dataframe = []
        
    except AssertionError:
        print(f"{path} Size exception")

  0%|          | 0/14306 [00:00<?, ?it/s]

  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_i

  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_i

  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_i

  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_i

  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_i

  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
  f, f_hat = f[audible_indexes], f_hat[audible_indexes]
