In [2]:
import pandas as pd
import numpy as np
import json
import torch
import torch.nn as nn
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder

### Encode categorical variables and upscale to consistent size

In [64]:
# Import data
features= pd.read_csv("features.csv")
features = features.drop(columns=["Unnamed: 0"], axis= 1)
features.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [4]:
# Data overview
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114000 entries, 0 to 113999
Data columns (total 20 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   track_id          114000 non-null  object 
 1   artists           113999 non-null  object 
 2   album_name        113999 non-null  object 
 3   track_name        113999 non-null  object 
 4   popularity        114000 non-null  int64  
 5   duration_ms       114000 non-null  int64  
 6   explicit          114000 non-null  bool   
 7   danceability      114000 non-null  float64
 8   energy            114000 non-null  float64
 9   key               114000 non-null  int64  
 10  loudness          114000 non-null  float64
 11  mode              114000 non-null  int64  
 12  speechiness       114000 non-null  float64
 13  acousticness      114000 non-null  float64
 14  instrumentalness  114000 non-null  float64
 15  liveness          114000 non-null  float64
 16  valence           11

In [5]:
features.describe()

Unnamed: 0,popularity,duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
count,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0,114000.0
mean,33.238535,228029.2,0.5668,0.641383,5.30914,-8.25896,0.637553,0.084652,0.31491,0.15605,0.213553,0.474068,122.147837,3.904035
std,22.305078,107297.7,0.173542,0.251529,3.559987,5.029337,0.480709,0.105732,0.332523,0.309555,0.190378,0.259261,29.978197,0.432621
min,0.0,0.0,0.0,0.0,0.0,-49.531,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,17.0,174066.0,0.456,0.472,2.0,-10.013,0.0,0.0359,0.0169,0.0,0.098,0.26,99.21875,4.0
50%,35.0,212906.0,0.58,0.685,5.0,-7.004,1.0,0.0489,0.169,4.2e-05,0.132,0.464,122.017,4.0
75%,50.0,261506.0,0.695,0.854,8.0,-5.003,1.0,0.0845,0.598,0.049,0.273,0.683,140.071,4.0
max,100.0,5237295.0,0.985,1.0,11.0,4.532,1.0,0.965,0.996,1.0,1.0,0.995,243.372,5.0


In [65]:
# Remove duplicates
features = features.drop_duplicates()
# Handle missing data
features.isna().sum()
features= features.dropna()

In [66]:
# Get encoded genre descriptions
with open("encoded_genre_descriptions.json") as f:
    content= json.load(f)

In [None]:
# Categorical data encoding

# Replace genres with encoded descriptions
features["track_genre"] = features["track_genre"].map(content)

# Sentence transformer
st = SentenceTransformer("all-MiniLM-L6-v2")
# Encode text data
features["album_name"] = features["album_name"].apply(st.encode)
features["track_name"] = features["track_name"].apply(st.encode)

# Ordinal encoder
oe= OrdinalEncoder()
# Ordinal encode artists as it doesnt hold semantic meaning
features["artists"] = oe.fit_transform(features[["artists"]])

features

In [86]:
features_embeddings= features.copy()
# Compute mean for genre embeddings while keeping all else unchanged
grouping = features_embeddings.groupby("track_id", as_index=False)["track_genre"].agg(lambda x: np.mean(np.stack((x)), axis=0))
# Drop duplicates, keeping the first occurrence of all other features
features_embeddings = features_embeddings.drop(columns=["track_genre"]).drop_duplicates(subset=["track_id"])
features_embeddings = features_embeddings.merge(grouping, on="track_id", how="left")

In [None]:
# Previous cell took 159m save to pkl
features_embeddings.to_pickle("features_preprocessed.pkl")

In [22]:
features_embeddings= pd.read_pickle("features_preprocessed.pkl")

In [87]:
# Scalar normaliziation
scaler= StandardScaler()

# Quickly change explicit to float
features_embeddings["explicit"] = features_embeddings["explicit"] * 1.0

scalars= [col for col in features_embeddings.columns if  features_embeddings[col].dtype in ["int64", "float64"]]
features_embeddings[scalars] = scaler.fit_transform(features_embeddings[scalars])

In [89]:
# Normalize embeddings

# Function to L2 normalize embeddings
def l2_normalize_1d(array):
    norm = np.linalg.norm(array)
    return array / norm if norm != 0 else array

embeddings= ["track_name", "album_name"]
# Normalize each embedding column
for col in embeddings:
    features_embeddings[col] = features_embeddings[col].apply(l2_normalize_1d)

features_embeddings

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,-0.595234,"[-0.042611223, -0.046201088, -0.03457106, 0.01...","[-0.042611223, -0.046201088, -0.03457106, 0.01...",1.933925,0.013472,-0.306447,0.644253,-0.675975,-1.203275,0.335727,-1.324621,0.490458,-0.875166,-0.535482,0.723656,0.934047,-1.133599,0.226216,"[0.04281982313841581, -0.0413171318359673, 0.0..."
1,4qPNDBW1i3p13qLCt0Ki3A,-1.376509,"[-0.055657774, -0.019521242, 0.023629805, -0.0...","[-0.072181344, 0.011208587, 0.03520768, -0.047...",1.059312,-0.704186,-0.306447,-0.804604,-1.825602,-1.203275,-1.673087,0.754933,-0.098364,1.760810,-0.535468,-0.595078,-0.770269,-1.479843,0.226216,"[0.07256904989480972, -0.014707725495100021, 0..."
2,1iJBSr7s7jYXzM8EGcbK5b,-0.369802,"[-0.0340062, -0.054437377, 0.014670651, 0.0601...","[-0.0340062, -0.054437377, 0.014670651, 0.0601...",1.156491,-0.162188,-0.306447,-0.702731,-1.073473,-1.484183,-0.236524,0.754933,-0.280219,-0.349626,-0.535485,-0.512978,-1.329497,-1.518259,0.226216,"[0.09977447986602783, -0.0242838766425848, 0.0..."
3,6lfxq3CG4xtTiEg7opyCyx,-0.099947,"[-0.04945588, 0.02640154, -0.02629091, 0.04236...","[-0.051752593, -0.032192286, 0.088695966, 0.03...",1.836746,-0.240925,-0.306447,-1.676182,-2.240247,-1.484183,-1.918228,0.754933,-0.451480,1.704650,-0.535266,-0.436009,-1.241999,1.981635,-1.979174,"[0.09977447986602783, -0.0242838766425848, 0.0..."
4,5vjLSffimiIP26QG5WcN2K,-1.159034,"[-0.09101045, -0.057857774, -0.011362545, -0.0...","[-0.09101045, -0.057857774, -0.011362545, -0.0...",2.371232,-0.268195,-0.306447,0.315996,-0.746122,-0.922368,-0.226373,0.754933,-0.307585,0.415925,-0.535485,-0.687954,-1.150696,-0.070030,0.226216,"[0.09977447986602783, -0.0242838766425848, 0.0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89735,2C3TZjDRiAzdyViavDJ217,0.745090,"[0.013029114, -0.0064732456, 0.11790641, 0.005...","[-0.0065607643, 0.09955642, -0.031772323, 0.03...",-0.592735,1.379914,-0.306447,-2.208184,-1.556706,-0.079646,-1.511831,0.754933,-0.399395,0.921365,2.330062,-0.670508,-1.657046,0.130717,2.431606,"[0.0747806504368782, 0.006442745216190815, -0...."
89736,1hIz5L4IB9hN3WRYPOCGPw,0.745090,"[0.013029114, -0.0064732456, 0.11790641, 0.005...","[-0.014128448, 0.021833342, 0.034011472, 0.091...",-0.544146,1.379923,-0.306447,-2.196865,-2.016557,-1.484183,-1.880499,-1.324621,-0.417934,1.967716,2.478280,-0.574553,-1.652861,-1.222517,0.226216,"[0.0747806504368782, 0.006442745216190815, -0...."
89737,6x8ZfSoqDjuNa5SVP5QjvX,-1.192517,"[-0.14179662, 0.04931275, 0.017334942, -0.0407...","[-0.0033045784, 0.08042111, -0.03275188, -0.00...",-0.544146,0.374710,-0.306447,0.378251,-1.190384,-1.484183,-0.458874,-1.324621,-0.401161,1.592330,-0.535485,-0.682823,1.040567,0.342654,0.226216,"[0.0747806504368782, 0.006442745216190815, -0...."
89738,2e6sXL2bYv4bSz6VTdnfLs,0.308371,"[0.032583725, 0.052094545, 0.05051703, -0.0473...","[-0.11017303, 0.068362534, 0.00659094, 0.03007...",0.379057,0.484736,-0.306447,0.140548,-0.500608,0.482169,-0.457725,0.754933,-0.509744,0.155815,-0.535485,0.272105,-0.214844,0.461588,0.226216,"[0.0747806504368782, 0.006442745216190815, -0...."


In [90]:
# Upscale scalars to match embeddings because RNN needs consistent shape

# Multi-layer perceptron 
class ScalarUpscaler(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ScalarUpscaler, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )
    
    def forward(self, x):
        return self.mlp(x)

# Initialize the ScalarUpscaler model
scalar_upscaler = ScalarUpscaler(input_dim=1, output_dim=384)


# Upscale each scalar column
for col in scalars:
    upscaled_tensor = scalar_upscaler(torch.tensor(features_embeddings[col].values, dtype=torch.float32).unsqueeze(1)) # Flatten
    features_embeddings[f"{col}_upscaled"] = list(upscaled_tensor.detach().numpy())  # Detatch from grad and convert to NumPy
    features_embeddings = features_embeddings.drop(col, axis=1)

In [91]:
features_embeddings

Unnamed: 0,track_id,album_name,track_name,track_genre,artists_upscaled,popularity_upscaled,duration_ms_upscaled,explicit_upscaled,danceability_upscaled,energy_upscaled,key_upscaled,loudness_upscaled,mode_upscaled,speechiness_upscaled,acousticness_upscaled,instrumentalness_upscaled,liveness_upscaled,valence_upscaled,tempo_upscaled,time_signature_upscaled
0,5SuOikwiRyPMVoIQDJUgSV,"[-0.042611223, -0.046201088, -0.03457106, 0.01...","[-0.042611223, -0.046201088, -0.03457106, 0.01...","[0.04281982313841581, -0.0413171318359673, 0.0...","[0.22623691, -0.2630899, 0.28008905, 0.3336281...","[0.23569924, 0.07754627, 0.9953241, -0.1993762...","[0.1333766, -0.24471569, 0.37829068, 0.2256398...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[0.14795417, -0.1255033, 0.49180856, 0.0987763...","[0.2459076, -0.26909897, 0.25624132, 0.3416464...","[0.4700154, -0.3349966, 0.07784962, 0.37893984...","[0.13526565, -0.19553947, 0.42042187, 0.154080...","[0.54328257, -0.36000785, 0.04080657, 0.370527...","[0.14580175, -0.15707129, 0.4478899, 0.1240215...","[0.31681713, -0.30067033, 0.1810764, 0.3712474...","[0.21248901, -0.25748584, 0.29466042, 0.329254...","[0.14530328, -0.107552834, 0.51419556, 0.08597...","[0.13947034, -0.032412566, 0.5862957, 0.065255...","[0.4332261, -0.32097077, 0.10002486, 0.3832992...","[0.12553126, -0.22034308, 0.40583375, 0.174306..."
1,4qPNDBW1i3p13qLCt0Ki3A,"[-0.055657774, -0.019521242, 0.023629805, -0.0...","[-0.072181344, 0.011208587, 0.03520768, -0.047...","[0.07256904989480972, -0.014707725495100021, 0...","[0.5738303, -0.37058035, 0.028592411, 0.365585...","[0.14794555, 0.008012899, 0.63925403, 0.051020...","[0.25292706, -0.27180842, 0.24742353, 0.344402...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[0.28842872, -0.2869221, 0.20841533, 0.3579632...","[0.83992505, -0.4830391, -0.08890677, 0.312103...","[0.4700154, -0.3349966, 0.07784962, 0.37893984...","[0.7503216, -0.44409406, -0.04913701, 0.329303...","[0.14366117, -0.0991827, 0.5231176, 0.08367412...","[0.14092025, -0.24440902, 0.35712966, 0.255290...","[0.21777466, 0.08127101, 0.9264592, -0.1505331...","[0.21248582, -0.2574841, 0.29466364, 0.3292523...","[0.22619945, -0.26308015, 0.28013343, 0.333612...","[0.27545002, -0.28091362, 0.22308734, 0.352594...","[0.6369213, -0.3945482, 0.00068719685, 0.35271...","[0.12553126, -0.22034308, 0.40583375, 0.174306..."
2,1iJBSr7s7jYXzM8EGcbK5b,"[-0.0340062, -0.054437377, 0.014670651, 0.0601...","[-0.0340062, -0.054437377, 0.014670651, 0.0601...","[0.09977447986602783, -0.0242838766425848, 0.0...","[0.1852988, -0.24752226, 0.31499597, 0.2979095...","[0.14957899, 0.031931207, 0.6845803, 0.0294982...","[0.14734286, -0.24458757, 0.34709787, 0.267165...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[0.2525652, -0.27166876, 0.24787804, 0.3442608...","[0.4064809, -0.31443882, 0.11991134, 0.3843602...","[0.639475, -0.39562756, -0.00045279413, 0.3521...","[0.15872085, -0.24368271, 0.33694616, 0.279785...","[0.14366117, -0.0991827, 0.5231176, 0.08367412...","[0.16629803, -0.24370599, 0.3304773, 0.2864274...","[0.18160954, -0.24636707, 0.31825873, 0.294441...","[0.21248975, -0.2574861, 0.2946597, 0.3292546,...","[0.20849466, -0.25575122, 0.2978813, 0.3253279...","[0.546174, -0.36098635, 0.039636504, 0.3700925...","[0.6595259, -0.40410122, -0.009403378, 0.34777...","[0.12553126, -0.22034308, 0.40583375, 0.174306..."
3,6lfxq3CG4xtTiEg7opyCyx,"[-0.04945588, 0.02640154, -0.02629091, 0.04236...","[-0.051752593, -0.032192286, 0.088695966, 0.03...","[0.09977447986602783, -0.0242838766425848, 0.0...","[0.14099935, -0.24439871, 0.3568103, 0.2557095...","[0.22563717, 0.07963715, 0.95666605, -0.171957...","[0.15939462, -0.24362916, 0.33634493, 0.280532...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[0.7521348, -0.4449015, -0.049926274, 0.328941...","[1.0943725, -0.59608513, -0.20386684, 0.263698...","[0.639475, -0.39562756, -0.00045279413, 0.3521...","[0.8945853, -0.5058637, -0.11392044, 0.3022574...","[0.14366117, -0.0991827, 0.5231176, 0.08367412...","[0.19894978, -0.25221226, 0.3043857, 0.3131569...","[0.21195975, 0.08247937, 0.90411866, -0.134687...","[0.21243995, -0.25745964, 0.2947097, 0.3292279...","[0.19654852, -0.25132206, 0.30602202, 0.310094...","[0.49266654, -0.34384936, 0.06506932, 0.376387...","[0.24156508, 0.07630977, 1.0130721, -0.2130315...","[0.93204206, -0.52267456, -0.13032505, 0.29655..."
4,5vjLSffimiIP26QG5WcN2K,"[-0.09101045, -0.057857774, -0.011362545, -0.0...","[-0.09101045, -0.057857774, -0.011362545, -0.0...","[0.09977447986602783, -0.0242838766425848, 0.0...","[0.44504857, -0.32536805, 0.09222077, 0.381935...","[0.29041368, 0.063367724, 1.149627, -0.321417,...","[0.16356862, -0.2432971, 0.33262086, 0.2851617...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[0.13341618, -0.20006022, 0.4177078, 0.1576988...","[0.2668824, -0.27740178, 0.23222205, 0.3494696...","[0.3361912, -0.30845678, 0.16561946, 0.3792405...","[0.15716723, -0.24380627, 0.3383323, 0.2780619...","[0.14366117, -0.0991827, 0.5231176, 0.08367412...","[0.17253801, -0.24465367, 0.3255828, 0.2892846...","[0.14223987, -0.1769402, 0.4319136, 0.1392394,...","[0.21248975, -0.2574861, 0.2946597, 0.3292546,...","[0.24888828, -0.2702495, 0.25249696, 0.3428169...","[0.44117332, -0.32392666, 0.09477889, 0.382382...","[0.1391291, -0.2445127, 0.36257663, 0.2477839,...","[0.12553126, -0.22034308, 0.40583375, 0.174306..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89735,2C3TZjDRiAzdyViavDJ217,"[0.013029114, -0.0064732456, 0.11790641, 0.005...","[-0.0065607643, 0.09955642, -0.031772323, 0.03...","[0.0747806504368782, 0.006442745216190815, -0....","[0.14431867, -0.10190673, 0.52013546, 0.084200...","[0.22563553, -0.26293394, 0.28080302, 0.333378...","[0.17629722, 0.07149409, 0.77320695, -0.035022...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[1.0744164, -0.5867909, -0.19442293, 0.2681566...","[0.6821343, -0.4137324, -0.019458115, 0.342894...","[0.13979152, -0.24448943, 0.36076713, 0.250333...","[0.6557433, -0.40250292, -0.0077148303, 0.3486...","[0.14366117, -0.0991827, 0.5231176, 0.08367412...","[0.19070998, -0.24921674, 0.31021008, 0.302995...","[0.13883719, -0.03640153, 0.58133644, 0.066298...","[0.28524202, 0.064890504, 1.1352518, -0.310688...","[0.24454731, -0.26857376, 0.25795004, 0.341112...","[0.7409227, -0.4399091, -0.045046188, 0.331176...","[0.12729776, -0.23512346, 0.39526016, 0.194921...","[0.29771596, 0.06123926, 1.1709367, -0.336952,..."
89736,1hIz5L4IB9hN3WRYPOCGPw,"[0.013029114, -0.0064732456, 0.11790641, 0.005...","[-0.014128448, 0.021833342, 0.034011472, 0.091...","[0.0747806504368782, 0.006442745216190815, -0....","[0.14431867, -0.10190673, 0.52013546, 0.084200...","[0.21448034, -0.25858068, 0.29267162, 0.330180...","[0.17629871, 0.07149494, 0.7732101, -0.0350262...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[1.067371, -0.5835095, -0.19108883, 0.26973087...","[0.95523614, -0.53302956, -0.14044747, 0.29256...","[0.639475, -0.39562756, -0.00045279413, 0.3521...","[0.872321, -0.49656695, -0.10373186, 0.3062679...","[0.54328257, -0.36000785, 0.04080657, 0.370527...","[0.19374311, -0.2502819, 0.30793375, 0.3065178...","[0.23981616, 0.076679066, 1.0079442, -0.209039...","[0.30270296, 0.060086023, 1.1880735, -0.348476...","[0.22125816, -0.26179865, 0.28599992, 0.331556...","[0.73847073, -0.43881732, -0.043979064, 0.3316...","[0.48127067, -0.33939555, 0.071499065, 0.37767...","[0.12553126, -0.22034308, 0.40583375, 0.174306..."
89737,6x8ZfSoqDjuNa5SVP5QjvX,"[-0.14179662, 0.04931275, 0.017334942, -0.0407...","[-0.0033045784, 0.08042111, -0.03275188, -0.00...","[0.0747806504368782, 0.006442745216190815, -0....","[0.46372247, -0.33253726, 0.08140024, 0.379649...","[0.21448034, -0.25858068, 0.29267162, 0.330180...","[0.13891977, -0.18660769, 0.4257838, 0.1469315...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[0.13925183, -0.18579608, 0.42627093, 0.146282...","[0.46247482, -0.33204952, 0.0821043, 0.3797897...","[0.639475, -0.39562756, -0.00045279413, 0.3521...","[0.20009735, -0.2526378, 0.30360368, 0.3146203...","[0.54328257, -0.36000785, 0.04080657, 0.370527...","[0.19103277, -0.24931785, 0.30992445, 0.303299...","[0.2010588, 0.083568335, 0.8605871, -0.1039486...","[0.21248975, -0.2574861, 0.2946597, 0.3292546,...","[0.24761158, -0.26975662, 0.25410083, 0.342315...","[0.14722991, 0.002275581, 0.63096726, 0.053053...","[0.13591498, -0.19395241, 0.42137444, 0.152810...","[0.12553126, -0.22034308, 0.40583375, 0.174306..."
89738,2e6sXL2bYv4bSz6VTdnfLs,"[0.032583725, 0.052094545, 0.05051703, -0.0473...","[-0.11017303, 0.068362534, 0.00659094, 0.03007...","[0.0747806504368782, 0.006442745216190815, -0....","[0.13270143, -0.2018072, 0.41665906, 0.1590969...","[0.13932729, -0.18561158, 0.42638168, 0.146134...","[0.1452922, -0.15846336, 0.44652373, 0.1251781...","[0.1722785, -0.2446143, 0.32578623, 0.28916574...","[0.12643495, -0.23389685, 0.39649355, 0.192650...","[0.20657477, -0.25503945, 0.29918963, 0.322879...","[0.14518315, -0.1591641, 0.44597054, 0.1257094...","[0.19991899, -0.25257167, 0.30372515, 0.314392...","[0.14366117, -0.0991827, 0.5231176, 0.08367412...","[0.20799267, -0.25556523, 0.29822338, 0.324688...","[0.12509525, -0.23199219, 0.3984088, 0.1891255...","[0.21248975, -0.2574861, 0.2946597, 0.3292546,...","[0.1293019, -0.21011654, 0.41167083, 0.1657476...","[0.15540254, -0.24394667, 0.33990672, 0.276104...","[0.1443091, -0.16478127, 0.4415368, 0.12996843...","[0.12553126, -0.22034308, 0.40583375, 0.174306..."


In [None]:
for col in features_embeddings.columns:
    if col == "track_id":
        continue
    for row in features_embeddings[col]:
        if len(row) != 384:
            print("length mismatch")

In [117]:
# Save to pkl
features_embeddings.to_pickle("features_preprocessed.pkl")