Spotify Recommendation Algorithm Pre-processing
Steps:
1. Install required packages and dependencies and read csv file containing song data
2. Clean CSV data and convert dataframe into item-feature matrix 

In [2]:
# Install packages and dependencies
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
from spotipy.oauth2 import SpotifyOAuth
from sklearn.preprocessing import MinMaxScaler
from joblib import Parallel, delayed
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

In [3]:
# Read CSV file 
# @st.cache_data
df = pd.read_csv('data\spotify_data.csv')

# Create Feature Set, drop unnecessary columns 
feat_vec = df.drop(columns=['Unnamed: 0','artist_name', 'track_name', 'key', 'duration_ms', 'time_signature'])

pd.set_option('display.max_columns', None)

feat_vec

  df = pd.read_csv('data\spotify_data.csv')


Unnamed: 0,track_id,popularity,year,genre,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,53QF56cjZA9RTuuMZDrSA6,68,2012,acoustic,0.483,0.303,-10.058,1,0.0429,0.6940,0.000000,0.1150,0.1390,133.406
1,1s8tP3jP4GZcyHDsjvw218,50,2012,acoustic,0.572,0.454,-10.286,1,0.0258,0.4770,0.000014,0.0974,0.5150,140.182
2,7BRCa8MPiyuvr2VU3O9W0F,57,2012,acoustic,0.409,0.234,-13.711,1,0.0323,0.3380,0.000050,0.0895,0.1450,139.832
3,63wsZUhUZLlh1OsyrZq7sz,58,2012,acoustic,0.392,0.251,-9.845,1,0.0363,0.8070,0.000000,0.0797,0.5080,204.961
4,6nXIYClvJAfi6ujLiKqEq8,54,2012,acoustic,0.430,0.791,-5.419,0,0.0302,0.0726,0.019300,0.1100,0.2170,171.864
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159759,0m27F0IGHLGAWhqd6ccYst,4,2011,trip-hop,0.373,0.742,-6.453,0,0.0736,0.3250,0.000141,0.1590,0.5220,107.951
1159760,6er9p611eHEcUCU50j7D57,3,2011,trip-hop,0.516,0.675,-7.588,0,0.0326,0.7880,0.000129,0.1300,0.2640,119.897
1159761,7jsMMqxy1tt0rH5FzYcZTQ,2,2011,trip-hop,0.491,0.440,-8.512,1,0.0274,0.4770,0.003130,0.0936,0.0351,100.076
1159762,77lA1InUaXztuRk2vOzD1S,0,2011,trip-hop,0.480,0.405,-13.343,1,0.0276,0.4310,0.000063,0.1250,0.2020,133.885


**Using Multi-Hot Encoding to Represent Genres** 

In order to create an item-feature matrix to use the cosine similarity algorithm, all column types must be of numerical value. So, I would need to convert genre string values into integer values. Multi-hot-encoding is used to represent categorical data as binary vectors (0 and 1). 

In [3]:
#Create genre columns, there's so much, so lets only keep the most popular ones
genre_list = feat_vec['genre'].unique().tolist()

# genres_to_remove = ['afrobeat','black-metal','breakbeat','cantopop','chicago-house','comedy','death-metal','deep-house','detroit-techno','drum-and-bass','dubstep','electronic','forro','french','garage','german','grindcore','hard-rock','hardcore','hardstyle','heavy-metal','indian','metalcore','industrial','minimal-techno','new-age','pop-film','power-pop','progressive-house','psych-rock','punk-rock','sertanejo','show-tunes','ska','swedish','trance','trip-hop']
# updated_genre_list = list(filter(lambda x: x not in genres_to_remove, genre_list))
 
# replace indie-pop to indie
index = genre_list.index('indie-pop')
genre_list[index] = 'indie'
#need to update df as well
feat_vec.loc[feat_vec['genre'] == 'indie-pop', 'genre'] = 'indie'

print(genre_list)

# use one-hot-encoding to convert genre categories into binary matrix format
# iterate over list of genres and then make value of 1 if genre matches 
for item in genre_list:
    feat_vec['genre_'+item] = feat_vec['genre'].apply(lambda genre: 1 if genre == item else 0)
    
# drop genre column in feat_vec df
feat_vec.drop('genre', axis=1, inplace=True)

feat_vec

['acoustic', 'afrobeat', 'alt-rock', 'ambient', 'black-metal', 'blues', 'breakbeat', 'cantopop', 'chicago-house', 'chill', 'classical', 'club', 'comedy', 'country', 'dance', 'dancehall', 'death-metal', 'deep-house', 'detroit-techno', 'disco', 'drum-and-bass', 'dub', 'dubstep', 'edm', 'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk', 'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove', 'guitar', 'hard-rock', 'hardcore', 'hardstyle', 'heavy-metal', 'hip-hop', 'house', 'indian', 'indie', 'industrial', 'jazz', 'k-pop', 'metal', 'metalcore', 'minimal-techno', 'new-age', 'opera', 'party', 'piano', 'pop', 'pop-film', 'power-pop', 'progressive-house', 'psych-rock', 'punk', 'punk-rock', 'rock', 'rock-n-roll', 'romance', 'sad', 'salsa', 'samba', 'sertanejo', 'show-tunes', 'singer-songwriter', 'ska', 'sleep', 'songwriter', 'soul', 'spanish', 'swedish', 'tango', 'techno', 'trance', 'trip-hop']


Unnamed: 0,track_id,popularity,year,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre_acoustic,genre_afrobeat,genre_alt-rock,genre_ambient,genre_black-metal,genre_blues,genre_breakbeat,genre_cantopop,genre_chicago-house,genre_chill,genre_classical,genre_club,genre_comedy,genre_country,genre_dance,genre_dancehall,genre_death-metal,genre_deep-house,genre_detroit-techno,genre_disco,genre_drum-and-bass,genre_dub,genre_dubstep,genre_edm,genre_electro,genre_electronic,genre_emo,genre_folk,genre_forro,genre_french,genre_funk,genre_garage,genre_german,genre_gospel,genre_goth,genre_grindcore,genre_groove,genre_guitar,genre_hard-rock,genre_hardcore,genre_hardstyle,genre_heavy-metal,genre_hip-hop,genre_house,genre_indian,genre_indie,genre_industrial,genre_jazz,genre_k-pop,genre_metal,genre_metalcore,genre_minimal-techno,genre_new-age,genre_opera,genre_party,genre_piano,genre_pop,genre_pop-film,genre_power-pop,genre_progressive-house,genre_psych-rock,genre_punk,genre_punk-rock,genre_rock,genre_rock-n-roll,genre_romance,genre_sad,genre_salsa,genre_samba,genre_sertanejo,genre_show-tunes,genre_singer-songwriter,genre_ska,genre_sleep,genre_songwriter,genre_soul,genre_spanish,genre_swedish,genre_tango,genre_techno,genre_trance,genre_trip-hop
0,53QF56cjZA9RTuuMZDrSA6,68,2012,0.483,0.303,-10.058,1,0.0429,0.6940,0.000000,0.1150,0.1390,133.406,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1s8tP3jP4GZcyHDsjvw218,50,2012,0.572,0.454,-10.286,1,0.0258,0.4770,0.000014,0.0974,0.5150,140.182,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,7BRCa8MPiyuvr2VU3O9W0F,57,2012,0.409,0.234,-13.711,1,0.0323,0.3380,0.000050,0.0895,0.1450,139.832,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,63wsZUhUZLlh1OsyrZq7sz,58,2012,0.392,0.251,-9.845,1,0.0363,0.8070,0.000000,0.0797,0.5080,204.961,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,6nXIYClvJAfi6ujLiKqEq8,54,2012,0.430,0.791,-5.419,0,0.0302,0.0726,0.019300,0.1100,0.2170,171.864,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159759,0m27F0IGHLGAWhqd6ccYst,4,2011,0.373,0.742,-6.453,0,0.0736,0.3250,0.000141,0.1590,0.5220,107.951,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1159760,6er9p611eHEcUCU50j7D57,3,2011,0.516,0.675,-7.588,0,0.0326,0.7880,0.000129,0.1300,0.2640,119.897,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1159761,7jsMMqxy1tt0rH5FzYcZTQ,2,2011,0.491,0.440,-8.512,1,0.0274,0.4770,0.003130,0.0936,0.0351,100.076,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1159762,77lA1InUaXztuRk2vOzD1S,0,2011,0.480,0.405,-13.343,1,0.0276,0.4310,0.000063,0.1250,0.2020,133.885,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [4]:
# Have a column for each categorial time period (bucketing)

# Find min and max values of year 
print('Min Value', feat_vec['year'].min())
print('Max Value', feat_vec['year'].max())

# Make columns for each time period
feat_vec['year_2000-2004'] = feat_vec['year'].apply(lambda year: 1 if year>=2000 and year<2005 else 0)
feat_vec['year_2005-2009'] = feat_vec['year'].apply(lambda year: 1 if year>=2005 and year<2010 else 0)
feat_vec['year_2010-2014'] = feat_vec['year'].apply(lambda year: 1 if year>=2010 and year<2015 else 0)
feat_vec['year_2015-2019'] = feat_vec['year'].apply(lambda year: 1 if year>=2015 and year<2020 else 0)
feat_vec['year_2020-2024'] = feat_vec['year'].apply(lambda year: 1 if year>=2020 and year<2025 else 0)

# Drop year column, no longer needed
feat_vec = feat_vec.drop(columns=['year'])

feat_vec


Min Value 2000
Max Value 2023


Unnamed: 0,track_id,popularity,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre_acoustic,genre_afrobeat,genre_alt-rock,genre_ambient,genre_black-metal,genre_blues,genre_breakbeat,genre_cantopop,genre_chicago-house,genre_chill,genre_classical,genre_club,genre_comedy,genre_country,genre_dance,genre_dancehall,genre_death-metal,genre_deep-house,genre_detroit-techno,genre_disco,genre_drum-and-bass,genre_dub,genre_dubstep,genre_edm,genre_electro,genre_electronic,genre_emo,genre_folk,genre_forro,genre_french,genre_funk,genre_garage,genre_german,genre_gospel,genre_goth,genre_grindcore,genre_groove,genre_guitar,genre_hard-rock,genre_hardcore,genre_hardstyle,genre_heavy-metal,genre_hip-hop,genre_house,genre_indian,genre_indie,genre_industrial,genre_jazz,genre_k-pop,genre_metal,genre_metalcore,genre_minimal-techno,genre_new-age,genre_opera,genre_party,genre_piano,genre_pop,genre_pop-film,genre_power-pop,genre_progressive-house,genre_psych-rock,genre_punk,genre_punk-rock,genre_rock,genre_rock-n-roll,genre_romance,genre_sad,genre_salsa,genre_samba,genre_sertanejo,genre_show-tunes,genre_singer-songwriter,genre_ska,genre_sleep,genre_songwriter,genre_soul,genre_spanish,genre_swedish,genre_tango,genre_techno,genre_trance,genre_trip-hop,year_2000-2004,year_2005-2009,year_2010-2014,year_2015-2019,year_2020-2024
0,53QF56cjZA9RTuuMZDrSA6,68,0.483,0.303,-10.058,1,0.0429,0.6940,0.000000,0.1150,0.1390,133.406,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,1s8tP3jP4GZcyHDsjvw218,50,0.572,0.454,-10.286,1,0.0258,0.4770,0.000014,0.0974,0.5150,140.182,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,7BRCa8MPiyuvr2VU3O9W0F,57,0.409,0.234,-13.711,1,0.0323,0.3380,0.000050,0.0895,0.1450,139.832,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,63wsZUhUZLlh1OsyrZq7sz,58,0.392,0.251,-9.845,1,0.0363,0.8070,0.000000,0.0797,0.5080,204.961,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,6nXIYClvJAfi6ujLiKqEq8,54,0.430,0.791,-5.419,0,0.0302,0.0726,0.019300,0.1100,0.2170,171.864,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159759,0m27F0IGHLGAWhqd6ccYst,4,0.373,0.742,-6.453,0,0.0736,0.3250,0.000141,0.1590,0.5220,107.951,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
1159760,6er9p611eHEcUCU50j7D57,3,0.516,0.675,-7.588,0,0.0326,0.7880,0.000129,0.1300,0.2640,119.897,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
1159761,7jsMMqxy1tt0rH5FzYcZTQ,2,0.491,0.440,-8.512,1,0.0274,0.4770,0.003130,0.0936,0.0351,100.076,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
1159762,77lA1InUaXztuRk2vOzD1S,0,0.480,0.405,-13.343,1,0.0276,0.4310,0.000063,0.1250,0.2020,133.885,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0


**Normalizing Feature Vectors**

All feature values should be on a scale from 0-1. This is to ensure that when running the cosine similarity algorithm, the similarity depends on the direction of the vector, not the dependent on the magnitude or scale of each vetor. Varying scales and magnitude will result in some features having more weighting than others. 

Popularity scale ranges from 1-100; Loudness scale ranges from -60-0, Tempo scale ranges from 0-250. These feature values must be scaled from 0-1 to get a better cosine simlarity score. 

In [5]:
# popularity scale: 1-100, loudness scale: -60-0, tempo scale: 0-250, scale features from 0-1 
# add min and max values for each row to establish min and max values, then once scaling is done, remove min and max columns
min_row = {'popularity': '0', 'loudness': '-60', 'tempo': '0'}
max_row = {'popularity': '100', 'loudness': '0', 'tempo': '250'}

min_row_df = pd.DataFrame([min_row])
max_row_df = pd.DataFrame([max_row])

feat_vec = pd.concat([feat_vec, min_row_df], ignore_index=True)
feat_vec = pd.concat([feat_vec, max_row_df], ignore_index=True)

# scale popularity, loudness, and tempo features to 0-1
scale = ['popularity', 'loudness', 'tempo']
scaler = MinMaxScaler()
feat_vec[scale] = scaler.fit_transform(feat_vec[scale])

# drop min and max values
feat_vec = feat_vec.iloc[:-2]

feat_vec

Unnamed: 0,track_id,popularity,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genre_acoustic,genre_afrobeat,genre_alt-rock,genre_ambient,genre_black-metal,genre_blues,genre_breakbeat,genre_cantopop,genre_chicago-house,genre_chill,genre_classical,genre_club,genre_comedy,genre_country,genre_dance,genre_dancehall,genre_death-metal,genre_deep-house,genre_detroit-techno,genre_disco,genre_drum-and-bass,genre_dub,genre_dubstep,genre_edm,genre_electro,genre_electronic,genre_emo,genre_folk,genre_forro,genre_french,genre_funk,genre_garage,genre_german,genre_gospel,genre_goth,genre_grindcore,genre_groove,genre_guitar,genre_hard-rock,genre_hardcore,genre_hardstyle,genre_heavy-metal,genre_hip-hop,genre_house,genre_indian,genre_indie,genre_industrial,genre_jazz,genre_k-pop,genre_metal,genre_metalcore,genre_minimal-techno,genre_new-age,genre_opera,genre_party,genre_piano,genre_pop,genre_pop-film,genre_power-pop,genre_progressive-house,genre_psych-rock,genre_punk,genre_punk-rock,genre_rock,genre_rock-n-roll,genre_romance,genre_sad,genre_salsa,genre_samba,genre_sertanejo,genre_show-tunes,genre_singer-songwriter,genre_ska,genre_sleep,genre_songwriter,genre_soul,genre_spanish,genre_swedish,genre_tango,genre_techno,genre_trance,genre_trip-hop,year_2000-2004,year_2005-2009,year_2010-2014,year_2015-2019,year_2020-2024
0,53QF56cjZA9RTuuMZDrSA6,0.68,0.483,0.303,0.754730,1.0,0.0429,0.6940,0.000000,0.1150,0.1390,0.533624,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,1s8tP3jP4GZcyHDsjvw218,0.50,0.572,0.454,0.751285,1.0,0.0258,0.4770,0.000014,0.0974,0.5150,0.560728,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,7BRCa8MPiyuvr2VU3O9W0F,0.57,0.409,0.234,0.699525,1.0,0.0323,0.3380,0.000050,0.0895,0.1450,0.559328,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,63wsZUhUZLlh1OsyrZq7sz,0.58,0.392,0.251,0.757949,1.0,0.0363,0.8070,0.000000,0.0797,0.5080,0.819844,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,6nXIYClvJAfi6ujLiKqEq8,0.54,0.430,0.791,0.824835,0.0,0.0302,0.0726,0.019300,0.1100,0.2170,0.687456,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159759,0m27F0IGHLGAWhqd6ccYst,0.04,0.373,0.742,0.809209,0.0,0.0736,0.3250,0.000141,0.1590,0.5220,0.431804,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1159760,6er9p611eHEcUCU50j7D57,0.03,0.516,0.675,0.792057,0.0,0.0326,0.7880,0.000129,0.1300,0.2640,0.479588,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1159761,7jsMMqxy1tt0rH5FzYcZTQ,0.02,0.491,0.440,0.778093,1.0,0.0274,0.4770,0.003130,0.0936,0.0351,0.400304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1159762,77lA1InUaXztuRk2vOzD1S,0.00,0.480,0.405,0.705087,1.0,0.0276,0.4310,0.000063,0.1250,0.2020,0.535540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


**Export cleaned dataset**

In [6]:
# Export feat_vec to CSV
feat_vec.to_csv('cleaned_spotify_data.csv', index=False)