In [1]:
%load_ext jupyter_black

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import os
import sys
import pandas as pd
import numpy as np
import requests
from spotify_dl import spotify_dl
from pathlib import Path
import time
import os
from dotenv import load_dotenv  # changed magic command to explicit load
import librosa
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import pairwise
from sklearn.model_selection import train_test_split
from typing import List
from flask import Flask, redirect, request
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D  # new
from tensorflow.keras.layers import MaxPooling2D  # new
from tensorflow.keras.layers import Flatten  # new
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.callbacks import EarlyStopping
from skimage.transform import resize


pd.set_option("display.max_rows", None)  # pandas dataframe formatting options
pd.set_option("display.max_columns", None)
# pd.options.display.float_format = "{:,.2f}".format


custom_env_path = "../../brainstation_capstone_cfg.env"  # environment variables file

## Feature Function Definitions

In [3]:
# How was this solved? sampling rate must be explicitly passed to every function
# This includes the display function itself!
# Hence, the sampling rate is now an output from the function itself
def get_mfcc(y, sr):
    mfcc = librosa.power_to_db(librosa.feature.mfcc(y=y, sr=sr), ref=np.max)
    return mfcc


def get_melspectrogram(y, sr):
    melspectrogram = librosa.power_to_db(
        librosa.feature.melspectrogram(y=y, sr=sr), ref=np.max
    )
    # this is a power spectrum (amplitude squared)
    return melspectrogram


def get_chroma_vector(y, sr):
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    return chroma


def get_tonnetz(y, sr):
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    return tonnetz


def get_feature(input_file_path, track_id):
    # Load data
    y, sr = librosa.load(
        input_file_path,
        sr=None,
        offset=10,
        duration=120,
    )
    # Extracting MFCC feature
    mfcc = get_mfcc(y, sr)
    # Move save step to this point!
    np.save(f"../data/vectorized_mp3s/raw/mfcc_{track_id}.npy", mfcc)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_min = mfcc.min(axis=1)
    mfcc_max = mfcc.max(axis=1)
    mfcc_feature = np.concatenate((mfcc_mean, mfcc_min, mfcc_max))

    # Need to move the save to before the reduction of features

    # Extracting Mel Spectrogram feature
    melspectrogram = get_melspectrogram(y, sr)
    np.save(
        f"../data/vectorized_mp3s/raw/melspectrogram_{track_id}.npy",
        melspectrogram,
    )
    melspectrogram_mean = melspectrogram.mean(axis=1)
    melspectrogram_min = melspectrogram.min(axis=1)
    melspectrogram_max = melspectrogram.max(axis=1)
    melspectrogram_feature = np.concatenate(
        (melspectrogram_mean, melspectrogram_min, melspectrogram_max)
    )

    # Extracting chroma vector feature
    chroma = get_chroma_vector(y, sr)
    np.save(f"../data/vectorized_mp3s/raw/chroma_{track_id}.npy", chroma)
    chroma_mean = chroma.mean(axis=1)
    chroma_min = chroma.min(axis=1)
    chroma_max = chroma.max(axis=1)
    chroma_feature = np.concatenate((chroma_mean, chroma_min, chroma_max))

    # Extracting tonnetz feature
    tntz = get_tonnetz(y, sr)
    np.save(f"../data/vectorized_mp3s/raw/tonnetz_{track_id}.npy", tntz)
    tntz_mean = tntz.mean(axis=1)
    tntz_min = tntz.min(axis=1)
    tntz_max = tntz.max(axis=1)
    tntz_feature = np.concatenate((tntz_mean, tntz_min, tntz_max))

    # return chroma_feature, melspectrogram_feature, mfcc_feature, tntz_feature
    # this returns both the pairwise vector feature and the CNN feature
    pairwise_feature = np.concatenate(
        (chroma_feature, melspectrogram_feature, mfcc_feature, tntz_feature)
    )
    height = 224
    width = 224
    mel_resize = resize(melspectrogram, (height, width))
    chroma_resize = resize(chroma, (height, width))
    tntz_resize = resize(tntz, (height, width))
    layers = [mel_resize, chroma_resize, tntz_resize]
    image_stack = []
    for layer in layers:
        max_val = layer.max()
        min_val = layer.min()
        range = max_val - min_val
        range = max(range, 1.0)
        # need to have this to handle division by 0
        norm = (layer - min_val) / range
        image_stack.append(norm.astype(np.float32))
    cnn_feature = np.dstack(image_stack)
    return pairwise_feature, cnn_feature

## CNN Model Setup

In [4]:
def set_random_seed(seed):
    """Random seeds for reproducability"""

    random.seed(seed)
    tf.random.set_seed(seed)

In [5]:
height = 224
width = 224
channels = 3
res_model = ResNet50V2(
    weights="imagenet", include_top=True, input_shape=(height, width, channels)
)
res_model.summary()

Model: "resnet50v2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 pool1_pad (ZeroPadding2D)   (None, 114, 114, 64)         0         ['conv1_conv[0][0]']          
                                                                                         

In [6]:
# Freeze all the layers in the base model
for layer in res_model.layers:
    layer.trainable = False

In [7]:
# For image analysis we only need to flatten for embedding
set_random_seed(121)
res_out = res_model.output
output = Flatten()(res_out)

In [8]:
model = Model(inputs=res_model.input, outputs=output)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 pool1_pad (ZeroPadding2D)   (None, 114, 114, 64)         0         ['conv1_conv[0][0]']          
                                                                                              

## Vectorize Data

### Count MP3s

In [9]:
data_dir = Path("../data/mp3s/")
path_glob = data_dir.rglob("*.mp3")
pairwise_file_paths = []
for pairwise_file_path in path_glob:
    pairwise_file_paths.append(
        pairwise_file_path
    )  # creates a list for repeated iteration
    # if this is not done, the .rglob command above has to be repeated to regenerate iterator
len(pairwise_file_paths)  # number of mp3s in directory

11578

### Generate Track Feature Files

#### Pairwise Parquet and CNN Matrices

In [16]:
# Generator objects need to be rerun each time to prevent exhaustion
data_dir = Path("../data/mp3s/")
output_pairwise_dir = Path("../data/vectorized_mp3s/pairwise_parquets/")
output_cnn_dir = Path("../data/vectorized_mp3s/cnn_parquets/")
data_dir_glob = data_dir.rglob("*.mp3")
output_pairwise_glob = output_pairwise_dir.rglob("*.parquet")
output_cnn_glob = output_cnn_dir.rglob("*.parquet")
count = 1
file_paths = [file_path for file_path in data_dir_glob]
print("Number of MP3 Files: ", len(file_paths), "\n")
pairwise_track_ids = [file_path.stem for file_path in output_pairwise_glob]
cnn_track_ids = [file_path.stem for file_path in output_cnn_glob]
for file_path in file_paths:
    print(f"{count}. MP3 FILE PATH: \n", f"{file_path}")
    path_split = str(file_path).split("/")
    track_id = path_split[3]
    pairwise_track, cnn_track = get_feature(file_path, track_id)
    if (len(pairwise_track_ids) > 0) & (track_id in pairwise_track_ids):
        print(f"{track_id} has already been vectorized for pairwise...skipping...")
    else:
        vectorized_df = pd.DataFrame(pairwise_track).T
        vectorized_df["track_id"] = track_id
        vectorized_df = vectorized_df.set_index(vectorized_df.track_id).drop(
            columns="track_id"
        )
        vectorized_df.columns = vectorized_df.columns.astype(str)
        vectorized_df.to_parquet(
            f"../data/vectorized_mp3s/pairwise_parquets/{track_id}.parquet"
        )
        print(f"{track_id} has been pairwise vectorized!")
    if (len(cnn_track_ids) > 0) & (track_id in cnn_track_ids):
        print(f"{track_id} has already been vectorized for CNN...skipping...\n")
        count += 1
    else:
        cnn_track = np.expand_dims(cnn_track, axis=0)
        cnn_track = model.predict(cnn_track)
        vectorized_df = pd.DataFrame(cnn_track)
        vectorized_df["track_id"] = track_id
        vectorized_df = vectorized_df.set_index(vectorized_df.track_id).drop(
            columns="track_id"
        )
        vectorized_df.columns = vectorized_df.columns.astype(str)
        vectorized_df.to_parquet(
            f"../data/vectorized_mp3s/cnn_parquets/{track_id}.parquet"
        )
        print(f"{track_id} has been CNN vectorized!\n")
        count += 1

Number of MP3 Files:  11578 

1. MP3 FILE PATH: 
 ../data/mp3s/1ZB2qWsheGabSEYvBYxjKn/Take on Me/Weezer - Take on Me.mp3
1ZB2qWsheGabSEYvBYxjKn has already been vectorized for pairwise...skipping...
1ZB2qWsheGabSEYvBYxjKn has already been vectorized for CNN...skipping...

2. MP3 FILE PATH: 
 ../data/mp3s/5V9H9J5GcUGY5ig029g5OU/Shkleepy/Manwolves - Shkleepy.mp3
5V9H9J5GcUGY5ig029g5OU has already been vectorized for pairwise...skipping...
5V9H9J5GcUGY5ig029g5OU has already been vectorized for CNN...skipping...

3. MP3 FILE PATH: 
 ../data/mp3s/34FsCOAQ0U99vAh3uoiLmm/Bandana (feat. Young Buck)/Dirty Audio, BL3R, Young Buck - Bandana (feat. Young Buck).mp3
34FsCOAQ0U99vAh3uoiLmm has already been vectorized for pairwise...skipping...
34FsCOAQ0U99vAh3uoiLmm has already been vectorized for CNN...skipping...

4. MP3 FILE PATH: 
 ../data/mp3s/25mldAmMHYzXhDXCxTpTHy/Chloroform/Phoenix - Chloroform.mp3
25mldAmMHYzXhDXCxTpTHy has already been vectorized for pairwise...skipping...
25mldAmMHYzXhDXCx



63ElaCh1e2AosE7EJAdxfq has already been vectorized for pairwise...skipping...
63ElaCh1e2AosE7EJAdxfq has been CNN vectorized!

858. MP3 FILE PATH: 
 ../data/mp3s/7BDGjDqOuUbZkwDPSXoMl0/Saturday Night/The Cat Empire - Saturday Night.mp3
7BDGjDqOuUbZkwDPSXoMl0 has already been vectorized for pairwise...skipping...
7BDGjDqOuUbZkwDPSXoMl0 has been CNN vectorized!

859. MP3 FILE PATH: 
 ../data/mp3s/6R40BJoXMj9R942vnhuFsS/Kenner Boogie/Jon Batiste - Kenner Boogie.mp3
6R40BJoXMj9R942vnhuFsS has already been vectorized for pairwise...skipping...
6R40BJoXMj9R942vnhuFsS has been CNN vectorized!

860. MP3 FILE PATH: 
 ../data/mp3s/0FeqPtdkwXs54spLxfWnqr/Smoke Ring Halo/The Wood Brothers - Smoke Ring Halo.mp3
0FeqPtdkwXs54spLxfWnqr has already been vectorized for pairwise...skipping...
0FeqPtdkwXs54spLxfWnqr has been CNN vectorized!

861. MP3 FILE PATH: 
 ../data/mp3s/6VMT3SzIMbNoR5lsUsniQY/Stars/Skillet - Stars.mp3
6VMT3SzIMbNoR5lsUsniQY has already been vectorized for pairwise...skipping...
6VM

  return pitch_tuning(


7wobmC9Xz4stYN1ZVLWgi5 has already been vectorized for pairwise...skipping...
7wobmC9Xz4stYN1ZVLWgi5 has been CNN vectorized!

1394. MP3 FILE PATH: 
 ../data/mp3s/3mskluxUoD4xICumdJCR7l/Dave McGillivray/Jeff Beal - Dave McGillivray.mp3
3mskluxUoD4xICumdJCR7l has already been vectorized for pairwise...skipping...
3mskluxUoD4xICumdJCR7l has been CNN vectorized!

1395. MP3 FILE PATH: 
 ../data/mp3s/0XVQTr58DZbLUcjacnTp8k/Madness/Ruelle - Madness.mp3
0XVQTr58DZbLUcjacnTp8k has already been vectorized for pairwise...skipping...
0XVQTr58DZbLUcjacnTp8k has been CNN vectorized!

1396. MP3 FILE PATH: 
 ../data/mp3s/18765csbulflFn1Ww4bcys/Change/Clay Walker - Change.mp3
18765csbulflFn1Ww4bcys has already been vectorized for pairwise...skipping...
18765csbulflFn1Ww4bcys has been CNN vectorized!

1397. MP3 FILE PATH: 
 ../data/mp3s/7Kth3JPrAA9t1gwDL61zSn/Lotta Love/Jack & Jack - Lotta Love.mp3
7Kth3JPrAA9t1gwDL61zSn has already been vectorized for pairwise...skipping...
7Kth3JPrAA9t1gwDL61zSn has 

  return pitch_tuning(


0vybyrCk6ANFFmDTBWq74f has already been vectorized for pairwise...skipping...
0vybyrCk6ANFFmDTBWq74f has been CNN vectorized!

2682. MP3 FILE PATH: 
 ../data/mp3s/2cgZfcnb639TaZhd1AU8iz/Bun Up the Dance/Dillon Francis, Skrillex - Bun Up the Dance.mp3
2cgZfcnb639TaZhd1AU8iz has already been vectorized for pairwise...skipping...
2cgZfcnb639TaZhd1AU8iz has been CNN vectorized!

2683. MP3 FILE PATH: 
 ../data/mp3s/6l4qV9VNeCOesvcLPJMO9y/ - .mp3
6l4qV9VNeCOesvcLPJMO9y has already been vectorized for pairwise...skipping...
6l4qV9VNeCOesvcLPJMO9y has been CNN vectorized!

2684. MP3 FILE PATH: 
 ../data/mp3s/3DusC19xQoOt98IVXnH9uS/Corporate Cafeteria (Acoustic)/The Expendables - Corporate Cafeteria (Acoustic).mp3
3DusC19xQoOt98IVXnH9uS has already been vectorized for pairwise...skipping...
3DusC19xQoOt98IVXnH9uS has been CNN vectorized!

2685. MP3 FILE PATH: 
 ../data/mp3s/0l0CvurVUrr2w3Jj1hOVFc/NO/Meghan Trainor - NO.mp3
0l0CvurVUrr2w3Jj1hOVFc has already been vectorized for pairwise...skippi

  return pitch_tuning(


2SNtwYtk9a4THKlERP0bMN has already been vectorized for pairwise...skipping...
2SNtwYtk9a4THKlERP0bMN has been CNN vectorized!

4932. MP3 FILE PATH: 
 ../data/mp3s/3RDcUlLGp3SLp2AmUbUbls/Cookie Jar (feat. The-Dream)/Gym Class Heroes, The-Dream - Cookie Jar (feat. The-Dream).mp3
3RDcUlLGp3SLp2AmUbUbls has already been vectorized for pairwise...skipping...
3RDcUlLGp3SLp2AmUbUbls has been CNN vectorized!

4933. MP3 FILE PATH: 
 ../data/mp3s/0lb0z6jpnOO5qYVayvqWoh/Past Life Melodies/The American Boychoir - Past Life Melodies.mp3
0lb0z6jpnOO5qYVayvqWoh has already been vectorized for pairwise...skipping...
0lb0z6jpnOO5qYVayvqWoh has been CNN vectorized!

4934. MP3 FILE PATH: 
 ../data/mp3s/0rU4dLUSToe7uQUfaFCoda/3 Romances sans paroles, Op. 17 No. 3, Andante moderato in A-Flat Major/Gabriel Fauré, Christine Croshaw - 3 Romances sans paroles, Op. 17# No. 3, Andante moderato in A-Flat Major.mp3
0rU4dLUSToe7uQUfaFCoda has already been vectorized for pairwise...skipping...
0rU4dLUSToe7uQUfaFCoda

  return pitch_tuning(


2C6FKqjI3CT4Q0YjOlAQAl has already been vectorized for pairwise...skipping...
2C6FKqjI3CT4Q0YjOlAQAl has been CNN vectorized!

6683. MP3 FILE PATH: 
 ../data/mp3s/0xMd5bcWTbyXS7wPrBtZA6/Burn/Ellie Goulding - Burn.mp3
0xMd5bcWTbyXS7wPrBtZA6 has already been vectorized for pairwise...skipping...
0xMd5bcWTbyXS7wPrBtZA6 has been CNN vectorized!

6684. MP3 FILE PATH: 
 ../data/mp3s/3blJWKRuT9BPPSi1xN8hHX/Mi Bloncito/Ñejo - Mi Bloncito.mp3
3blJWKRuT9BPPSi1xN8hHX has already been vectorized for pairwise...skipping...
3blJWKRuT9BPPSi1xN8hHX has been CNN vectorized!

6685. MP3 FILE PATH: 
 ../data/mp3s/1eGAZX0TKa5qCxWsvhf8Th/Brick Wall Views/The Lawrence Arms - Brick Wall Views.mp3
1eGAZX0TKa5qCxWsvhf8Th has already been vectorized for pairwise...skipping...
1eGAZX0TKa5qCxWsvhf8Th has been CNN vectorized!

6686. MP3 FILE PATH: 
 ../data/mp3s/7CTTTbnqMg0ASqjH8v2D9l/Things You Don't Wanna Hear/George Carlin - Things You Don't Wanna Hear.mp3
7CTTTbnqMg0ASqjH8v2D9l has already been vectorized for p

  return pitch_tuning(


4oarDDCWsSnX2WOIRKuQo1 has already been vectorized for pairwise...skipping...
4oarDDCWsSnX2WOIRKuQo1 has been CNN vectorized!

6975. MP3 FILE PATH: 
 ../data/mp3s/6uv0GnArZDs61ivyZQL7kS/Then (Piano Mix)/Brad Paisley - Then (Piano Mix).mp3
6uv0GnArZDs61ivyZQL7kS has already been vectorized for pairwise...skipping...
6uv0GnArZDs61ivyZQL7kS has been CNN vectorized!

6976. MP3 FILE PATH: 
 ../data/mp3s/3stWWPN41byqp8loPdy92u/Master of None/Beach House - Master of None.mp3
3stWWPN41byqp8loPdy92u has already been vectorized for pairwise...skipping...
3stWWPN41byqp8loPdy92u has been CNN vectorized!

6977. MP3 FILE PATH: 
 ../data/mp3s/0dd2y9eFu1qkAnATCRW0G3/I Gave It All/Aquilo - I Gave It All.mp3
0dd2y9eFu1qkAnATCRW0G3 has already been vectorized for pairwise...skipping...
0dd2y9eFu1qkAnATCRW0G3 has been CNN vectorized!

6978. MP3 FILE PATH: 
 ../data/mp3s/03KisebN9LuPAchPJm32M8/Feeling You/Harrison Storm - Feeling You.mp3
03KisebN9LuPAchPJm32M8 has already been vectorized for pairwise...ski

  return pitch_tuning(


11SY8jJkg1CtUgHx37qUlm has already been vectorized for pairwise...skipping...
11SY8jJkg1CtUgHx37qUlm has been CNN vectorized!

7816. MP3 FILE PATH: 
 ../data/mp3s/6QUngYwZ65et2ye7Bj85EK/Can You Get To That/Funkadelic - Can You Get To That.mp3
6QUngYwZ65et2ye7Bj85EK has already been vectorized for pairwise...skipping...
6QUngYwZ65et2ye7Bj85EK has been CNN vectorized!

7817. MP3 FILE PATH: 
 ../data/mp3s/6lKRMylSZMtA7EqPl0pcdI/Are You Satisfied/MARINA - Are You Satisfied#.mp3
6lKRMylSZMtA7EqPl0pcdI has already been vectorized for pairwise...skipping...
6lKRMylSZMtA7EqPl0pcdI has been CNN vectorized!

7818. MP3 FILE PATH: 
 ../data/mp3s/3xnbJVx4QqsOYip8Cn6OrN/Crunch Time/Chris Travis - Crunch Time.mp3
3xnbJVx4QqsOYip8Cn6OrN has already been vectorized for pairwise...skipping...
3xnbJVx4QqsOYip8Cn6OrN has been CNN vectorized!

7819. MP3 FILE PATH: 
 ../data/mp3s/62fX8EW16l8St2yL8rMer9/In My Room - Remastered/The Beach Boys - In My Room - Remastered.mp3
62fX8EW16l8St2yL8rMer9 has already be

  return pitch_tuning(


5oICBRkaGVtLTkkKeXuSKF has already been vectorized for pairwise...skipping...
5oICBRkaGVtLTkkKeXuSKF has been CNN vectorized!

7926. MP3 FILE PATH: 
 ../data/mp3s/4kpbrWUW7rznN3mzjCjQUz/Satellite/The Kills - Satellite.mp3
4kpbrWUW7rznN3mzjCjQUz has already been vectorized for pairwise...skipping...
4kpbrWUW7rznN3mzjCjQUz has been CNN vectorized!

7927. MP3 FILE PATH: 
 ../data/mp3s/04G8U1rkNbiiLNsWo2doW2/Monsoon/Amber Mark, Mia Mark - Monsoon.mp3
04G8U1rkNbiiLNsWo2doW2 has already been vectorized for pairwise...skipping...
04G8U1rkNbiiLNsWo2doW2 has been CNN vectorized!

7928. MP3 FILE PATH: 
 ../data/mp3s/3GPD6qjLLDrhb7mUYCV9wT/City Lights/Motionless In White - City Lights.mp3
3GPD6qjLLDrhb7mUYCV9wT has already been vectorized for pairwise...skipping...
3GPD6qjLLDrhb7mUYCV9wT has been CNN vectorized!

7929. MP3 FILE PATH: 
 ../data/mp3s/5n51PH1jc6MtLsv8oTorck/Ponmela Aplaudi/Mark B., Don Miguelo - Ponmela Aplaudi.mp3
5n51PH1jc6MtLsv8oTorck has already been vectorized for pairwise...sk



4qWzWD48EQ3jQlyVvk9pxa has already been vectorized for pairwise...skipping...
4qWzWD48EQ3jQlyVvk9pxa has been CNN vectorized!

8157. MP3 FILE PATH: 
 ../data/mp3s/1tGKjMflcFTEY2IOzKhwLe/Prove It/Crowder, KB - Prove It.mp3
1tGKjMflcFTEY2IOzKhwLe has already been vectorized for pairwise...skipping...
1tGKjMflcFTEY2IOzKhwLe has been CNN vectorized!

8158. MP3 FILE PATH: 
 ../data/mp3s/1uigwk5hNV84zRd5YQQRTk/Pocketful of Sunshine/Natasha Bedingfield - Pocketful of Sunshine.mp3
1uigwk5hNV84zRd5YQQRTk has already been vectorized for pairwise...skipping...
1uigwk5hNV84zRd5YQQRTk has been CNN vectorized!

8159. MP3 FILE PATH: 
 ../data/mp3s/5CKHhg31HcYYhwUeeGqvhq/I Wish I Knew How It Would Feel to Be Free/Nina Simone - I Wish I Knew How It Would Feel to Be Free.mp3
5CKHhg31HcYYhwUeeGqvhq has already been vectorized for pairwise...skipping...
5CKHhg31HcYYhwUeeGqvhq has been CNN vectorized!

8160. MP3 FILE PATH: 
 ../data/mp3s/1FAPtCHEPQ4PAx0idWEplO/El Tiempo/#Secreto ##El Famoso Biberon### - El



1FPdKaYo5DK8oxCwkUq0GS has already been vectorized for pairwise...skipping...
1FPdKaYo5DK8oxCwkUq0GS has been CNN vectorized!

9225. MP3 FILE PATH: 
 ../data/mp3s/1rOlTL4pKQ9Y1fURua4AJR/My Body Is a Cage/Arcade Fire - My Body Is a Cage.mp3
1rOlTL4pKQ9Y1fURua4AJR has already been vectorized for pairwise...skipping...
1rOlTL4pKQ9Y1fURua4AJR has been CNN vectorized!

9226. MP3 FILE PATH: 
 ../data/mp3s/29chnYiDB5P03F0d5MKKt0/Fight Music/SEKAI NO OWARI - Fight Music.mp3
29chnYiDB5P03F0d5MKKt0 has already been vectorized for pairwise...skipping...
29chnYiDB5P03F0d5MKKt0 has been CNN vectorized!

9227. MP3 FILE PATH: 
 ../data/mp3s/414J8tKHbtF16XOiHGBEso/Tezeta (Nostalgia)/Mulatu Astatke - Tezeta (Nostalgia).mp3
414J8tKHbtF16XOiHGBEso has already been vectorized for pairwise...skipping...
414J8tKHbtF16XOiHGBEso has been CNN vectorized!

9228. MP3 FILE PATH: 
 ../data/mp3s/6EeUPx4P1utUI51LTON841/Barry Moves/Allan Rayman - Barry Moves.mp3
6EeUPx4P1utUI51LTON841 has already been vectorized for 



1uD76u7VzeiplKKmvLEJ43 has already been vectorized for pairwise...skipping...
1uD76u7VzeiplKKmvLEJ43 has been CNN vectorized!

9317. MP3 FILE PATH: 
 ../data/mp3s/2UrILPsnAc5Jh4VLGyFoKx/Symphony No. 4 in E Minor, Op. 98 IV. Allegro energico e passionato - Più allegro/Johannes Brahms, Wiener Philharmoniker, Carlos Kleiber - Symphony No. 4 in E Minor, Op. 98# IV. Allegro energico e passionato - Più allegro.mp3
2UrILPsnAc5Jh4VLGyFoKx has already been vectorized for pairwise...skipping...
2UrILPsnAc5Jh4VLGyFoKx has been CNN vectorized!

9318. MP3 FILE PATH: 
 ../data/mp3s/1bXgMtdwYuFAPApWxGbS17/How Great Is Your Love/Phil Wickham - How Great Is Your Love.mp3
1bXgMtdwYuFAPApWxGbS17 has already been vectorized for pairwise...skipping...
1bXgMtdwYuFAPApWxGbS17 has been CNN vectorized!

9319. MP3 FILE PATH: 
 ../data/mp3s/2pJZ1v8HezrAoZ0Fhzby92/What Do I Know/Ed Sheeran - What Do I Know#.mp3
2pJZ1v8HezrAoZ0Fhzby92 has already been vectorized for pairwise...skipping...
2pJZ1v8HezrAoZ0Fhzby92 ha

  return pitch_tuning(


6cZ8T2c95NfRTrGEXqThIq has already been vectorized for pairwise...skipping...
6cZ8T2c95NfRTrGEXqThIq has been CNN vectorized!

10165. MP3 FILE PATH: 
 ../data/mp3s/2a7z0RvsjwSMWEPwIWO5z2/La Ultima Vez/Feid - La Ultima Vez.mp3
2a7z0RvsjwSMWEPwIWO5z2 has already been vectorized for pairwise...skipping...
2a7z0RvsjwSMWEPwIWO5z2 has been CNN vectorized!

10166. MP3 FILE PATH: 
 ../data/mp3s/7os1J5P6JqIxKSxq71QuQC/Hero (feat. Christina Perri) - Deep Mix/Cash Cash, Christina Perri - Hero (feat. Christina Perri) - Deep Mix.mp3
7os1J5P6JqIxKSxq71QuQC has already been vectorized for pairwise...skipping...
7os1J5P6JqIxKSxq71QuQC has been CNN vectorized!

10167. MP3 FILE PATH: 
 ../data/mp3s/4qZwVr553XcQNdeqjueeE3/Roses Are Red - Original Version/Aqua - Roses Are Red - Original Version.mp3
4qZwVr553XcQNdeqjueeE3 has already been vectorized for pairwise...skipping...
4qZwVr553XcQNdeqjueeE3 has been CNN vectorized!

10168. MP3 FILE PATH: 
 ../data/mp3s/48DPyFzSUNGAYzXM9Zj4Jd/La rondine Chi il bel s

  return pitch_tuning(


2s9M78CDrQDjB2D827XXr4 has already been vectorized for pairwise...skipping...
2s9M78CDrQDjB2D827XXr4 has been CNN vectorized!

10205. MP3 FILE PATH: 
 ../data/mp3s/3rMyMv8EjKXoPnaRo2hdJN/Stuntin' Like My Daddy - Street/Birdman, Lil Wayne - Stuntin' Like My Daddy - Street.mp3
3rMyMv8EjKXoPnaRo2hdJN has already been vectorized for pairwise...skipping...
3rMyMv8EjKXoPnaRo2hdJN has been CNN vectorized!

10206. MP3 FILE PATH: 
 ../data/mp3s/3UUx0JWEJCbKZvhzpruLfe/Letting You Go/Sara Evans - Letting You Go.mp3
3UUx0JWEJCbKZvhzpruLfe has already been vectorized for pairwise...skipping...
3UUx0JWEJCbKZvhzpruLfe has been CNN vectorized!

10207. MP3 FILE PATH: 
 ../data/mp3s/3As1HTUDBTiTDbwnuICy4C/Fisherman's Horizon Piano Collections Version (From Final Fantasy VIII) [For Piano Solo]/Nobuo Uematsu, daigoro789 - #Fisherman's Horizon# Piano Collections Version (From ##Final Fantasy VIII##) [For Piano Solo]#.mp3
3As1HTUDBTiTDbwnuICy4C has already been vectorized for pairwise...skipping...
3As1HTUD



4FwxXaVT6iGfhiInzQLJ2n has been CNN vectorized!

11450. MP3 FILE PATH: 
 ../data/mp3s/6QPCo7i6vBXzbdg4muyOdG/Bye Bye Blackbird/Riders In The Sky - Bye Bye Blackbird.mp3
6QPCo7i6vBXzbdg4muyOdG has already been vectorized for pairwise...skipping...
6QPCo7i6vBXzbdg4muyOdG has been CNN vectorized!

11451. MP3 FILE PATH: 
 ../data/mp3s/2v4kQsvlTPEYOIyJkytzGH/In A Sweater Poorly Knit/mewithoutYou - In A Sweater Poorly Knit.mp3
2v4kQsvlTPEYOIyJkytzGH has already been vectorized for pairwise...skipping...
2v4kQsvlTPEYOIyJkytzGH has been CNN vectorized!

11452. MP3 FILE PATH: 
 ../data/mp3s/1eupQ3yeBQPmAUcHyhrXkb/Mal ô mains/Sanseverino - Mal ô mains.mp3
1eupQ3yeBQPmAUcHyhrXkb has already been vectorized for pairwise...skipping...
1eupQ3yeBQPmAUcHyhrXkb has been CNN vectorized!

11453. MP3 FILE PATH: 
 ../data/mp3s/6FlgXdr8JJLfQvMoHQjE0r/Hey, Snow White/Destroyer - Hey, Snow White.mp3
6FlgXdr8JJLfQvMoHQjE0r has already been vectorized for pairwise...skipping...
6FlgXdr8JJLfQvMoHQjE0r has been C

#### Create Complete Pairwise Parquet

In [17]:
vectorized_data_path = Path("../data/vectorized_mp3s/pairwise_parquets/")
path_glob = vectorized_data_path.rglob("*.parquet")
dataframes = []
for file_path in path_glob:
    tmp = pd.read_parquet(f"{file_path}")
    dataframes.append(tmp)
vectorized_df = pd.concat(dataframes)

In [18]:
output_file_date = datetime.now().strftime(
    "%Y%m%d"
)  # time stamps the file with the reverse of the date
vectorized_df.to_parquet(
    f"../data/vectorized_mp3s/pairwise_complete_parquets/{output_file_date}_complete_pairwise_data.parquet"
)
assert vectorized_df.shape[0] == 11578

#### Create Complete CNN Parquet

In [19]:
vectorized_data_path = Path("../data/vectorized_mp3s/cnn_parquets/")
path_glob = vectorized_data_path.rglob("*.parquet")
dataframes = []
for file_path in path_glob:
    tmp = pd.read_parquet(f"{file_path}")
    dataframes.append(tmp)
vectorized_df = pd.concat(dataframes)

In [20]:
output_file_date = datetime.now().strftime(
    "%Y%m%d"
)  # time stamps the file with the reverse of the date
vectorized_df.to_parquet(
    f"../data/vectorized_mp3s/cnn_complete_parquets/{output_file_date}_complete_cnn_data.parquet"
)
assert vectorized_df.shape[0] == 11578