In [1]:
import sqlite3
import pandas as pd
import numpy as np
import matchering as mg
import json
from glob import glob
from tqdm import tqdm
from io import StringIO
import sys
import pathlib

import taglib
from datetime import datetime
import shutil
import os
from essentia.standard import MusicExtractor, YamlOutput,MetadataReader, PCA, YamlInput
import warnings
from zipfile import ZipFile
warnings.filterwarnings('ignore')
pd.set_option('max_colwidth', 100)

In [2]:
from project_tools.utils import effnet_config, json_opener, adapt_array, convert_array, tag_cleaner, digit2letters

In [3]:
from project_tools.models import Activator, Classifier

In [4]:
sqlite3.register_adapter(np.ndarray, adapt_array)
sqlite3.register_converter("array", convert_array)

In [5]:
conn = sqlite3.connect("jaage.db", detect_types= sqlite3.PARSE_DECLTYPES)
cur = conn.cursor()

In [6]:
load_path = "../../../../Volumes/LaCie/Loading Dock/"
dj_hub  = "../../../../Volumes/LaCie/DJ Hub/"

In [7]:
zip_files = glob(load_path+"*.zip")
zip_files

['../../../../Volumes/LaCie/Loading Dock/(W)DaveeS - House Anthology 1990 - 2000 (Disc 2).zip',
 '../../../../Volumes/LaCie/Loading Dock/Dharma Collective - Balearic Edits.zip',
 '../../../../Volumes/LaCie/Loading Dock/DiscoGram - DG030 (Also on vinyl).zip',
 '../../../../Volumes/LaCie/Loading Dock/Nico Raibak - Universal Stereo EP.zip',
 '../../../../Volumes/LaCie/Loading Dock/Saltywax - ZISSOU009 - Saltywax - Keep Dancing EP.zip',
 '../../../../Volumes/LaCie/Loading Dock/Tucan Discos - East Asia Pack.zip',
 '../../../../Volumes/LaCie/Loading Dock/Tuesday Brunch - Editz.zip']

In [8]:
if len(zip_files) > 0:
    for z in zip_files:
        zf = ZipFile(z)
        zf.extractall(path=load_path)
        shutil.move(z,dj_hub)

In [18]:
loading_files = pathlib.Path(load_path).glob("*[.wav, .mp3, .aiff]")

In [19]:
len_loading_files = len(list(loading_files))
print("There are {} files for the ETL pipeline".format(len_loading_files))

There are 79 files for the ETL pipeline


In [20]:
loading_files = pathlib.Path(load_path).glob("*[.wav, .mp3, .aiff]")

## Process

1. Mastering
    - Master
    - Transfer tags
    - Moved to Mastered Dir
 

2. Music Extractor
    - ETL

2. Effnet Embeddings and Genre Activations
    - ETL



### Mastering

In [21]:
ref_file = '../../../../Volumes/LaCie/DJ Hub/Rayko - Magnetized (Rayko rework).wav'
collection = "Collection"


In [22]:
new_file_paths = []

In [23]:
for f in tqdm(loading_files):
    out_stem = f.stem
    out_path = f.parent.parent/collection/f.stem
    out_path = out_path.as_posix() +".wav"
    
    mg.process(target= f.as_posix(),
              reference=ref_file, 
              results = [mg.pcm24(out_path)])
    
    load_tags = taglib.File(f.as_posix())
    mastered_tags = taglib.File(out_path)
    mastered_tags.tags = load_tags.tags
    mastered_tags.save()
    
    new_file_paths.append(out_path)
    
    try:
        shutil.move(f.as_posix(), dj_hub)
    except:
        print(f, "already exists")
        os.remove(f.as_posix())
    

79it [44:39, 33.92s/it]


### Music Extraction

In [25]:
copied_paths = new_file_paths[:]

In [26]:
new_file_paths = []
for i in copied_paths:
    if os.path.exists(i):
        new_file_paths.append(i)
        
len(new_file_paths)

77

In [27]:
music_ext = MusicExtractor(lowlevelStats=['mean', 'stdev'],
                                    rhythmStats=['mean', 'stdev', "max", "min", "median"],
                                    tonalStats=['mean', 'stdev'],
                           mfccStats = ["mean", "cov"],
                           gfccStats = ["mean", "cov"])

In [28]:
out_dir = 'temp_features/'
extracted_files = []
id_2_paths = {}

for fil in tqdm(new_file_paths, total = len(new_file_paths)):
    try:
        features, _ = music_ext(fil)
        idd = features['metadata.audio_properties.md5_encoded']
        YamlOutput(filename= out_dir+"features.json", format="json")(features)
        json_data = json_opener(out_dir+"features.json")
        id_2_paths[idd] = fil
        extracted_files.append(json_data)
    except Exception as e:
        print(e)

  0%|                                                                                                                                                                                                           | 0/77 [00:00<?, ?it/s][   INFO   ] MusicExtractor: Read metadata
  1%|██▌                                                                                                                                                                                                | 1/77 [00:12<15:21, 12.13s/it][   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] Musi

 21%|████████████████████████████████████████▎                                                                                                                                                         | 16/77 [02:53<11:47, 11.60s/it][   INFO   ] MusicExtractor: Read metadata
 22%|██████████████████████████████████████████▊                                                                                                                                                       | 17/77 [03:04<11:30, 11.50s/it][   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] Musi

 42%|████████████████████████████████████████████████████████████████████████████████▌                                                                                                                 | 32/77 [06:29<09:57, 13.27s/it][   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
 43%|███████████████████████████████████████████████████████████████████████████████████▏                                                                                                              | 33/77 [06:47<10:48, 14.74s/it][   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] Musi

 62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                         | 48/77 [10:08<07:24, 15.33s/it][   INFO   ] MusicExtractor: Read metadata
 64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                      | 49/77 [10:25<07:27, 15.98s/it][   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] Musi

 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                | 64/77 [13:51<03:20, 15.41s/it][   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 65/77 [14:07<03:05, 15.47s/it][   INFO   ] MusicExtractor: Read metadata
 86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                           

In [29]:
extracted = pd.json_normalize(extracted_files)
extracted.columns = extracted.columns.str.replace(".", "_")

In [30]:
extracted.rename(columns={"metadata_audio_properties_md5_encoded":"sid"}, inplace=True)

In [31]:
drop_cols = np.load("drop_cols.pkl", allow_pickle=True).tolist()

In [32]:
extracted.drop(drop_cols, axis = 1, inplace=True, 
               errors="ignore"
              )

In [33]:
extracted.set_index("sid", inplace=True)

In [34]:
extracted.shape

(77, 155)

In [35]:
cols = extracted.columns

meta_cols = cols[cols.str.startswith("meta")]
non_meta_cols = cols[~cols.str.startswith("meta")]

meta_df = extracted[meta_cols].copy()
extracted.drop(meta_cols, axis = 1, inplace=True)

In [36]:
list_cols = extracted.columns[extracted.iloc[0].apply(lambda x:type(x)) == list]
no_list_cols = extracted.columns[extracted.iloc[0].apply(lambda x:type(x)) != list]
list_data = extracted[list_cols]
no_list_data = extracted[no_list_cols]

In [37]:
meta_df = meta_df.applymap(tag_cleaner)

In [38]:
meta_df.columns = meta_df.columns.str.split("_").map(lambda x:x[-1])

In [39]:
meta_df.rename(columns={"name":"file_name"}, inplace=True)

In [40]:
meta_df

Unnamed: 0_level_0,length,gain,codec,file_name,album,artist,bpm,initialkey,title,date
sid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
f39b01e73f2279bb5f23bdee3c62101f,325.667084,-11.903238,pcm_s24le,(W)DaveeS - House Anthology 1990 - 2000 (Disc 2) - 01 Separate Minds - Troubled World (U.S. Mix)...,House Anthology 1990 - 2000 (Disc 2),Separate Minds,136,F#m,Troubled World (U.S. Mix),
ec141be0fa374b857779c96ba843c3d5,275.200012,-13.940763,pcm_s24le,(W)DaveeS - House Anthology 1990 - 2000 (Disc 2) - 02 Colourblind - Nothing Better (TMVS Club).wav,House Anthology 1990 - 2000 (Disc 2),Colourblind,124,Ebm,Nothing Better (TMVS Club),
8ff6ccec00249e1f1648ac7c1906d5de,309.707764,-13.675098,pcm_s24le,(W)DaveeS - House Anthology 1990 - 2000 (Disc 2) - 03 Intermission Feat.Valerie Scott - Piece Of...,House Anthology 1990 - 2000 (Disc 2),Intermission Feat.Valerie Scott,131,Dm,Piece Of My Heart (Piece Mix),
112ff1ad5a7fe82fbcf3e827bc4c25c5,306.808167,-13.341776,pcm_s24le,(W)DaveeS - House Anthology 1990 - 2000 (Disc 2) - 04 T.F.O. - Give It To Me (Summer Mix).wav,House Anthology 1990 - 2000 (Disc 2),T.F.O.,130,Em,Give It To Me (Summer Mix),
4b988e0de7dbebd93b016e2067469f1b,252.160004,-13.921967,pcm_s24le,(W)DaveeS - House Anthology 1990 - 2000 (Disc 2) - 05 Joy Salinas - Bip Bip.wav,House Anthology 1990 - 2000 (Disc 2),Joy Salinas,127,Bbm,Bip Bip,
...,...,...,...,...,...,...,...,...,...,...
bc4784715bc7dd8d86764bc960a7d720,346.455475,-13.024578,pcm_s24le,Tuesday Brunch - Editz - 01 Get On Down.wav,,Tuesday Brunch,124,Ebm,Get On Down,
31318a6c22a8969fb7422e718140e5b6,339.062988,-13.313665,pcm_s24le,Tuesday Brunch - Editz - 02 Shame.wav,,Tuesday Brunch,126,Bm,Shame,
58321adc20b3cc71ee07b9e77809d3b6,311.666656,-13.159252,pcm_s24le,Various Artists - Tony Benn Feghaly - Free Blow (Dub Version).wav,,Tony Benn Feghaly,109,Fm,Free Blow (Dub Version),
1f7331351e453fe562710944a7b0274b,458.181305,-12.249758,pcm_s24le,Voilaaa - On te l'avait dit (Dimitri From Paris Super Disco Blend).wav,,Voilaaa,126,Am,On te l'avait dit (Dimitri From Paris Super Disco Blend),


In [41]:
tags_cols = pd.read_sql("SELECT * FROM tags LIMIT 1", con = conn).set_index('sid').columns.tolist()
tags_cols

['length',
 'gain',
 'codec',
 'file_name',
 'bpm',
 'initialkey',
 'title',
 'album',
 'artist',
 'date',
 'genre',
 'label']

In [42]:
meta_cols = [i for i in meta_df.columns if i in tags_cols]
meta_cols

['length',
 'gain',
 'codec',
 'file_name',
 'album',
 'artist',
 'bpm',
 'initialkey',
 'title',
 'date']

In [43]:
meta_df[meta_cols].to_sql("tags", con=conn, if_exists = "append")

In [44]:
files = pd.DataFrame(id_2_paths.items(), columns=["sid", "file_path"])

In [45]:
files.to_sql("files", con = conn, if_exists="append", index = False)

In [46]:
cols = no_list_data.columns
tonal_cols = cols[cols.str.startswith("tonal")]
lowlevel_cols = cols[cols.str.startswith("lowlevel")]
rhythm_cols = cols[cols.str.startswith("rhyt")]

tonal_df = no_list_data[tonal_cols]
lowlevel_df = no_list_data[lowlevel_cols]
rhythm_df = no_list_data[rhythm_cols]

In [47]:
tonal_df.to_sql("tonal_features", con=conn, if_exists="append")
lowlevel_df.to_sql("lowlevel_features", con=conn, if_exists="append")
rhythm_df.to_sql("rhythm_features", con=conn, if_exists="append")

In [48]:
for col in tqdm(list_cols):
    ser = list_data[col].apply(pd.Series)
    ser.columns = col + "_"+ ser.columns.astype(str)
    ser.to_sql(col+"_tbl", con = conn,if_exists="append")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 29.48it/s]


## Eff Net Activations and Genre Classifications

In [49]:
path2id = {v:k for k, v in id_2_paths.items()}

In [50]:
act = Activator(input_length=2.05, 
                model_path="onnx_models/discogs-effnet-bsdynamic-1.onnx",
                   pathid_dict=path2id)

In [51]:
gcols = pd.read_sql_query("SELECT * FROM effnet_genres LIMIT 1 ", con = conn).columns[1:].tolist()
# gcols[:5]

In [52]:
# sid, sf, output = next(act.batch_inference())
# genre_acts = output["activations"]

In [53]:
# genre_acts = [np.expand_dims(genre_acts[:, i], 0) for i in range(400)]
# genre_acts = pd.DataFrame(index = [sid], data = [genre_acts], columns=gcols)

In [54]:
for song in act.batch_inference():
    with conn:
        sid, sf, output = song
        genre_acts = output["activations"]
        embeds = output["embeddings"]
        genre_acts = [np.expand_dims(genre_acts[:, i], 0) for i in range(400)]
        genre_acts = pd.DataFrame(index = [sid], data = [genre_acts], columns=gcols)
        cur.execute("INSERT INTO effnet_embeddings (sid, effnet_embedding) values (?,?)", 
                    (sid, np.expand_dims(embeds,0)))
        genre_acts.to_sql("effnet_genres", con=conn, if_exists="append", index=False)
    conn.commit()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [02:57<00:00,  2.33s/it]


### Classification Head Models

In [55]:
model_paths = sorted(glob("onnx_models/*.onnx"))
model_infos = sorted(glob("onnx_models/json_info/*.json"))
effnet_models = [{"model": model_paths[i], 
                  "json":model_infos[i]} for i in range(len(model_paths)) if "effnet" in model_paths[i]]

In [56]:
effnet_models = effnet_models[:2] + effnet_models[4:]

In [57]:
new_ids = list(path2id.values())

In [58]:
for em in effnet_models:
    cls = Classifier(em, new_ids=new_ids)
    cls.batch_inference()
    cls.conn.commit()
    print("Completed => ", cls.table_name, "\n\n")

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2858.21it/s]


Completed =>  approachability_2c_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 3084.17it/s]


Completed =>  danceability_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2784.60it/s]


Completed =>  engagement_2c_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2033.94it/s]


Completed =>  genre_electronic_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2920.16it/s]


Completed =>  mood_acoustic_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2603.60it/s]


Completed =>  mood_aggressive_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2482.26it/s]


Completed =>  mood_happy_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2885.58it/s]


Completed =>  mood_party_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2785.50it/s]


Completed =>  mood_sad_effnet_discogs_1_activations 




100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 322.41it/s]


Completed =>  mtg_jamendo_genre_effnet_discogs_1_activations 




100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 458.82it/s]


Completed =>  mtg_jamendo_moodtheme_effnet_discogs_1_activations 




100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 464.91it/s]


Completed =>  mtg_jamendo_top50tags_effnet_discogs_1_activations 




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 2940.49it/s]

Completed =>  timbre_effnet_discogs_1_activations 





