In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [3]:
tracks = pd.read_csv('600k/tracks.csv')

In [4]:
artists = pd.read_csv('600k/artists.csv')

# Remove duplicates and null values form both dataframes

In [5]:
tracks.dropna(inplace=True)

In [6]:
tracks.duplicated(subset=['id']).sum()

0

In [7]:
artists.duplicated(subset=['id']).sum()

0

In [8]:
artists.isnull().sum()

id             0
followers     11
genres         0
name           3
popularity     0
dtype: int64

In [9]:
artists.dropna(inplace=True)

Set ids as index so further operation which depend on both dataframe gets faster

In [10]:
artists.set_index('id', inplace=True)

#the values which looks like lists are not lists but strings and we want to convert them to list , ast.literal_eval does the same

In [11]:
import ast

In [12]:
artists.genres = artists.genres.apply(lambda x: ast.literal_eval(x))

In [13]:
tracks[['artists','id_artists']]= tracks[['artists','id_artists']].map(lambda x: ast.literal_eval(x))

# We removed some artist with null values above, we have to remove tracks which have those artists.

In [14]:
def check_artists_exist_in_artist_df(artist_ids):
    for artist_id in artist_ids:
        if artist_id not in artists.index:
            return True
    return False

In [15]:
tracks_with_null_artist = tracks.id_artists.apply(check_artists_exist_in_artist_df)

In [16]:
clean_tracks = tracks[~tracks_with_null_artist]

In [17]:
#we dont need track names and artist name as feature
clean_tracks = clean_tracks.loc[:, ~clean_tracks.columns.isin(['name', 'artists'])]

# We will categorize the songs in 80s, 90s etc

In [18]:
clean_tracks['release_date'] = clean_tracks['release_date'].apply(lambda x: int(x[:4]))

In [19]:
clean_tracks['release_date'].min(), clean_tracks['release_date'].max()

(1900, 2021)

In [20]:
clean_tracks.shape

(564153, 18)

In [21]:
clean_tracks.isnull().sum()

id                  0
popularity          0
duration_ms         0
explicit            0
id_artists          0
release_date        0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
dtype: int64

In [22]:
clean_tracks=clean_tracks[clean_tracks["release_date"]>=1945]

In [23]:
clean_tracks.shape

(535771, 18)

In [24]:
clean_tracks.isnull().sum()

id                  0
popularity          0
duration_ms         0
explicit            0
id_artists          0
release_date        0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
dtype: int64

In [25]:
clean_tracks['release_date'].value_counts()

release_date
1998    12132
1999    12044
1997    12016
1996    11644
2020    11547
        ...  
1948     2177
1949     2151
1946     2140
1947     1976
1945     1736
Name: count, Length: 77, dtype: int64

In [26]:
clean_tracks["era_FLAG"]=pd.cut(clean_tracks["release_date"],[1944,1949,1959,1969,1979,1989,1999,2009,2021],labels=["40s","50s","60s","70s","80s","90s","00s","10s"],ordered=False)
clean_tracks["era_FLAG"].value_counts()

era_FLAG
10s    113919
90s    105770
00s     83844
80s     80859
70s     60757
60s     46274
50s     34168
40s     10180
Name: count, dtype: int64

In [27]:
clean_tracks.isnull().sum()

id                  0
popularity          0
duration_ms         0
explicit            0
id_artists          0
release_date        0
danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
time_signature      0
era_FLAG            0
dtype: int64

In [28]:
from tqdm import tqdm

tqdm.pandas(desc="Processing", unit=" rows")

A track can have multiple artist, so i am considering maximum poplularity and followers among all artists in the track

In [29]:
def aggregate_numerical_max(row, column_name):
    max_ = 0
    for artist_id in row['id_artists']:
        artist_value = artists.loc[artist_id][column_name]
        max_ = max(max_,artist_value)
    return max_

In [30]:
clean_tracks['artists_popularity'] = clean_tracks.progress_apply(lambda row: aggregate_numerical_max(row, 'popularity'), axis=1)

Processing: 100%|█████████████████████████████████████████████████████████| 535771/535771 [01:09<00:00, 7739.78 rows/s]


In [31]:
clean_tracks['artists_followers'] = clean_tracks.progress_apply(lambda row: aggregate_numerical_max(row, 'followers'), axis=1)

Processing: 100%|█████████████████████████████████████████████████████████| 535771/535771 [01:07<00:00, 7945.16 rows/s]


In [32]:
#Earlier when i did not apply id indexing on artist dataframe i had to wait for 40 hours(Yes Not joking) for each operation above , but with indexing it was done in 1 min

In [33]:
type(artists.loc['3cOzi726Iav1toV2LRVEjp']['genres'])

list

In [34]:
artists

Unnamed: 0_level_0,followers,genres,name,popularity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0DheY5irMjBUeLybbCUEZ2,0.000,[],Armid & Amir Zare Pashai feat. Sara Rouzbehani,0
0DlhY15l3wsrnlfGio2bjU,5.000,[],ปูนา ภาวิณี,0
0DmRESX2JknGPQyO15yxg7,0.000,[],Sadaa,0
0DmhnbHjm1qw6NCYPeZNgJ,0.000,[],Tra'gruda,0
0Dn11fWM7vHQ3rinvWEl4E,2.000,[],Ioannis Panoutsopoulos,0
...,...,...,...,...
3cOzi726Iav1toV2LRVEjp,4831.000,[black comedy],Ali Siddiq,34
6LogY6VMM3jgAE6fPzXeMl,46.000,[],Rodney Laney,2
19boQkDEIay9GaVAWkUhTa,257.000,[],Blake Wexler,10
5nvjpU3Y7L6Hpe54QuvDjy,2357.000,[black comedy],Donnell Rawlings,15


### Song genre can help us with better recommendation, We don't have any column for genre in `tracks_df` But `artist_df` do have `genres`, so we give each track their artists' genre

In [35]:
def combine_genres(row):
    genres_list = []
    for artist_id in row['id_artists']:
        artist_genres = artists.loc[artist_id]['genres']
        genres_list.extend(artist_genres)
    return genres_list

In [36]:
clean_tracks['combined_genres'] = clean_tracks.progress_apply(combine_genres, axis=1)

Processing: 100%|█████████████████████████████████████████████████████████| 535771/535771 [01:03<00:00, 8391.50 rows/s]


In [37]:
clean_tracks = clean_tracks.loc[:, ~clean_tracks.columns.isin(['release_date'])]

In [38]:
clean_tracks.isnull().sum()

id                    0
popularity            0
duration_ms           0
explicit              0
id_artists            0
danceability          0
energy                0
key                   0
loudness              0
mode                  0
speechiness           0
acousticness          0
instrumentalness      0
liveness              0
valence               0
tempo                 0
time_signature        0
era_FLAG              0
artists_popularity    0
artists_followers     0
combined_genres       0
dtype: int64

In [39]:
genres = set()
def extract_genres(artist_genres):
    for genre in artist_genres:
        if genre not in genres:
            genres.add(genre)
artists.genres.apply(extract_genres)
len(genres),genres

(5366,
 {'bass music',
  'hard rock brasileiro',
  'heavy gothic rock',
  'swansea indie',
  'bouzouki',
  'omaha indie',
  'chill lounge',
  'neo mellow',
  'candy pop',
  'german underground rap',
  'indie r&b',
  'comptine',
  'armenian folk',
  'arab trap',
  'sevilla indie',
  'uzbek traditional',
  'korean electronic',
  'shehnai',
  'manso indie',
  'guidance',
  'swedish classical',
  'irish experimental electronic',
  'italian pop rock',
  'folk rock',
  'west yorkshire indie',
  'emo rap italiano',
  'hungarian classical piano',
  'torch song',
  'russian folk metal',
  'ambient folk',
  'faroese folk',
  'barockinterpreten',
  'choro',
  'drone folk',
  'hi-tech',
  'pop quebecois',
  'swing',
  'hokkien pop',
  'metal paraguayo',
  'new zealand classical',
  'classic belgian pop',
  'folclor afrocolombiano',
  'nz punk',
  'fremantle indie',
  'brill building pop',
  'irish trap',
  'traditional scottish folk',
  'tecnobrega',
  'dutch experimental electronic',
  'azonto',


### Considering the genres as corpus we create word embeddings, this will capture the similarity between genre such as `winnipeg hip hop` and `spiritual hip hop`

In [40]:
from gensim.models import Word2Vec
genres_data = clean_tracks['combined_genres'].tolist()
word2vec_model_genres = Word2Vec(sentences=genres_data, vector_size=100, window=5, min_count=1, workers=8)

In [41]:
vec_mexican_indie = word2vec_model_genres.wv['mexican indie']
print(vec_mexican_indie)
similar_genres = word2vec_model_genres.wv.most_similar('mexican indie', topn=5)
print(similar_genres)

[-0.01785828  0.3885317  -0.3244921  -0.11066163 -1.0611145  -0.48250964
  0.6592878   0.32915542  0.21471645 -0.2504609  -0.56465536  0.04868354
  0.15350316 -0.10722407 -0.02157557  0.85691285 -0.32620707 -0.9130028
 -0.1239654  -0.55839187 -0.4186271  -0.4273217   0.05243472 -0.5477865
  0.7164236  -0.53526    -0.09680419  0.08025948  0.06666172 -0.6437789
  1.2464948  -0.14485107  0.6388846   0.10720737  0.47584417  0.27866077
  0.6174904   0.9200781   0.2596477   0.39862537 -0.03085999 -0.3047492
 -1.1268891  -0.5048559   0.28936657 -0.43310252  0.35405636 -0.46013916
  0.52155507 -0.3358728   0.47969085  0.4101826   0.6479449  -0.06589448
  0.17337362  0.9116829   0.15845713 -0.5209289  -0.19318223  0.23935992
 -0.2609494   0.35923305  0.6363505   0.61679345 -0.3096462   0.34806243
 -0.7293864  -0.22926725 -0.10464305 -0.4425467  -0.9206955   0.2706718
 -0.29456434  0.5446496   0.36848494  1.1657827  -0.83198625 -0.02610719
  0.7347361  -0.30530882 -0.6045334   0.15241332 -0.2408

### But there might be some dominant genres in a song, so to capture those genres and nuance we use tf-idf. 
### The idea is that tf-idf will capture unique relations of a genre and track and give unique recommendation to user, 
### while word_embeddings will give similar results 

In [42]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

#Create TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform([' '.join(genres) for genres in genres_data])

In [43]:
#Calculate weighted average genre vector for each track as a NumPy array
track_vectors = np.zeros((len(genres_data), word2vec_model_genres.vector_size))
for i, genres in enumerate(genres_data):
    genre_vectors = [word2vec_model_genres.wv[genre] for genre in genres]
    tfidf_weights = tfidf_matrix[i].toarray().flatten()
    weighted_genre_vectors = np.array([genre_vector * tfidf_weight for genre_vector, tfidf_weight in zip(genre_vectors, tfidf_weights)])
    track_vectors[i] = np.mean(weighted_genre_vectors, axis=0)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [44]:
# Convert track_vectors to DataFrame columns
track_vector_columns = [f'vector_{i+1}' for i in range(word2vec_model_genres.vector_size)]
track_vector_df = pd.DataFrame(track_vectors, columns=track_vector_columns)

In [45]:
track_vector_df.shape

(535771, 100)

In [50]:
track_vector_df.isnull().sum()

vector_1      33369
vector_2      33369
vector_3      33369
vector_4      33369
vector_5      33369
              ...  
vector_96     33369
vector_97     33369
vector_98     33369
vector_99     33369
vector_100    33369
Length: 100, dtype: int64

In [52]:
track_vector_df.fillna(0, inplace=True)

In [53]:
clean_tracks.shape

(535771, 21)

In [47]:
clean_tracks.reset_index(drop=True, inplace=True)

In [54]:
feature_set1 = pd.concat([clean_tracks, track_vector_df], axis=1)

In [55]:
feature_set1.shape

(535771, 121)

In [56]:
feature_set1.isnull().sum()

id             0
popularity     0
duration_ms    0
explicit       0
id_artists     0
              ..
vector_96      0
vector_97      0
vector_98      0
vector_99      0
vector_100     0
Length: 121, dtype: int64

In [57]:
feature_set1.describe()

Unnamed: 0,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,...,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100
count,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,...,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0
mean,29.224,230961.178,0.045,0.563,0.554,5.224,-10.012,0.657,0.1,0.429,...,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0
std,17.686,119499.568,0.207,0.165,0.248,3.52,4.954,0.475,0.172,0.339,...,0.001,0.002,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.002
min,0.0,5108.0,0.0,0.0,0.0,0.0,-60.0,0.0,0.0,0.0,...,-0.03,-0.076,-0.061,-0.058,-0.084,-0.102,-0.187,-0.293,-0.111,-0.492
25%,16.0,176953.0,0.0,0.454,0.366,2.0,-12.63,0.0,0.034,0.089,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,29.0,217800.0,0.0,0.576,0.564,5.0,-9.083,1.0,0.043,0.392,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,41.0,265567.5,0.0,0.684,0.756,8.0,-6.401,1.0,0.073,0.748,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,100.0,5621218.0,1.0,0.991,1.0,11.0,5.376,1.0,0.971,0.996,...,0.304,0.617,0.327,0.492,0.407,0.129,0.147,0.04,0.41,0.04


In [58]:
feature_set1.select_dtypes(include=['object']).columns.tolist()

['id', 'id_artists', 'combined_genres']

In [59]:
artists.describe()

Unnamed: 0,followers,popularity
count,1162081.0,1162081.0
mean,10220.722,8.796
std,254399.821,13.558
min,0.0,0.0
25%,10.0,0.0
50%,57.0,2.0
75%,417.0,13.0
max,78900234.0,100.0


In [60]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [61]:
feature_set1.describe()

Unnamed: 0,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,artists_popularity,artists_followers,vector_1,vector_2,vector_3,vector_4,vector_5,vector_6,vector_7,vector_8,vector_9,vector_10,vector_11,vector_12,vector_13,vector_14,vector_15,vector_16,vector_17,vector_18,vector_19,vector_20,vector_21,vector_22,vector_23,vector_24,vector_25,vector_26,vector_27,vector_28,vector_29,vector_30,vector_31,vector_32,vector_33,vector_34,vector_35,vector_36,vector_37,vector_38,vector_39,vector_40,vector_41,vector_42,vector_43,vector_44,vector_45,vector_46,vector_47,vector_48,vector_49,vector_50,vector_51,vector_52,vector_53,vector_54,vector_55,vector_56,vector_57,vector_58,vector_59,vector_60,vector_61,vector_62,vector_63,vector_64,vector_65,vector_66,vector_67,vector_68,vector_69,vector_70,vector_71,vector_72,vector_73,vector_74,vector_75,vector_76,vector_77,vector_78,vector_79,vector_80,vector_81,vector_82,vector_83,vector_84,vector_85,vector_86,vector_87,vector_88,vector_89,vector_90,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100
count,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0
mean,29.224,230961.178,0.045,0.563,0.554,5.224,-10.012,0.657,0.1,0.429,0.099,0.214,0.553,118.837,3.878,52.766,1263636.359,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0
std,17.686,119499.568,0.207,0.165,0.248,3.52,4.954,0.475,0.172,0.339,0.25,0.186,0.258,29.605,0.46,18.897,4320139.978,0.001,0.002,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.002,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.002,0.002,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.002
min,0.0,5108.0,0.0,0.0,0.0,0.0,-60.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.12,-0.564,-0.133,-0.116,-0.162,-0.062,-0.081,-0.292,-0.078,-0.105,-0.216,-0.595,-0.402,-0.251,-0.085,-0.195,-0.377,-0.186,-0.065,-0.034,-0.069,-0.197,-0.262,-0.175,-0.193,-0.25,-0.048,-0.333,-0.49,-0.267,-0.048,-0.149,-0.091,-0.033,-0.649,-0.188,-0.239,-0.061,-0.113,-0.193,-0.147,-0.027,-0.373,-0.087,-0.129,-0.102,-0.444,-0.091,-0.302,-0.258,-0.094,-0.273,-0.062,-0.048,-0.046,-0.155,-0.577,-0.309,-0.086,-0.412,-0.054,-0.086,-0.1,-0.206,-0.09,-0.251,-0.274,-0.052,-0.188,-0.499,-0.271,-0.257,-0.043,-0.149,-0.14,-0.137,-0.505,-0.076,-0.022,-0.357,-0.181,-0.151,-0.165,-0.157,-0.099,-0.134,-0.116,-0.109,-0.195,-0.183,-0.03,-0.076,-0.061,-0.058,-0.084,-0.102,-0.187,-0.293,-0.111,-0.492
25%,16.0,176953.0,0.0,0.454,0.366,2.0,-12.63,0.0,0.034,0.089,0.0,0.098,0.346,95.989,4.0,40.0,17229.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,29.0,217800.0,0.0,0.576,0.564,5.0,-9.083,1.0,0.043,0.392,0.0,0.138,0.563,117.887,4.0,54.0,123047.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,41.0,265567.5,0.0,0.684,0.756,8.0,-6.401,1.0,0.073,0.748,0.005,0.278,0.77,136.894,4.0,67.0,697933.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,100.0,5621218.0,1.0,0.991,1.0,11.0,5.376,1.0,0.971,0.996,1.0,1.0,1.0,246.381,5.0,100.0,78900234.0,0.113,0.138,0.177,0.117,0.717,0.241,0.424,0.13,0.174,0.268,0.311,0.059,0.136,0.168,0.185,0.106,0.081,0.254,0.187,0.327,0.112,0.083,0.122,0.054,0.25,0.036,0.418,0.075,0.1,0.145,0.352,0.182,0.405,0.172,0.197,0.055,0.059,0.243,0.068,0.111,0.199,0.249,0.087,0.077,0.043,0.442,0.334,0.554,0.126,0.168,0.566,0.118,0.346,0.296,0.143,0.135,0.093,0.12,0.269,0.162,0.191,0.455,0.136,0.143,0.512,0.159,0.11,0.167,0.098,0.04,0.05,0.058,0.149,0.152,0.156,0.169,0.101,0.338,0.303,0.036,0.088,0.258,0.102,0.323,0.063,0.436,0.033,0.156,0.069,0.062,0.304,0.617,0.327,0.492,0.407,0.129,0.147,0.04,0.41,0.04


In [62]:
feature_set1.to_csv("feature_set1.csv")

In [63]:
feature_set1

Unnamed: 0,id,popularity,duration_ms,explicit,id_artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,era_FLAG,artists_popularity,artists_followers,combined_genres,vector_1,vector_2,vector_3,vector_4,vector_5,vector_6,vector_7,vector_8,vector_9,vector_10,vector_11,vector_12,vector_13,vector_14,vector_15,vector_16,vector_17,vector_18,vector_19,vector_20,vector_21,vector_22,vector_23,vector_24,vector_25,vector_26,vector_27,vector_28,vector_29,vector_30,vector_31,vector_32,vector_33,vector_34,vector_35,vector_36,vector_37,vector_38,vector_39,vector_40,vector_41,vector_42,vector_43,vector_44,vector_45,vector_46,vector_47,vector_48,vector_49,vector_50,vector_51,vector_52,vector_53,vector_54,vector_55,vector_56,vector_57,vector_58,vector_59,vector_60,vector_61,vector_62,vector_63,vector_64,vector_65,vector_66,vector_67,vector_68,vector_69,vector_70,vector_71,vector_72,vector_73,vector_74,vector_75,vector_76,vector_77,vector_78,vector_79,vector_80,vector_81,vector_82,vector_83,vector_84,vector_85,vector_86,vector_87,vector_88,vector_89,vector_90,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100
0,0qB213IfGN0JXXm9aRjldF,61,167907,0,"[3ESG6pj6a0LvUKklENalT6, 548L4DXlt7N14Mhbfdmdqq]",0.495,0.021,3,-21.329,1,0.035,0.981,0.000,0.075,0.145,62.588,4,40s,61,295411.000,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1,20G1XJaTwIm2IuwA3Pjg1d,50,169067,0,"[3ESG6pj6a0LvUKklENalT6, 548L4DXlt7N14Mhbfdmdqq]",0.503,0.059,9,-16.131,1,0.050,0.973,0.000,0.103,0.331,76.642,4,40s,61,295411.000,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
2,0qPeyVfebszZcHTUc48Lzl,45,171667,0,"[3ESG6pj6a0LvUKklENalT6, 5MpELOfAiq7aIBTij30phD]",0.651,0.250,5,-12.352,1,0.058,0.810,0.000,0.437,0.644,82.854,4,40s,61,295411.000,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3,3YU16osxnEi1e0SvXIt0QW,44,192893,0,"[3ESG6pj6a0LvUKklENalT6, 5jCS1U0QP0gulcCtMOsOoX]",0.485,0.038,5,-16.477,1,0.029,0.977,0.006,0.086,0.205,80.334,4,40s,61,295411.000,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
4,3x95FfMhVfB3ZNIufQ1IF3,43,180893,0,[64vAECmFoB6mi7n1zTRwR8],0.606,0.184,10,-15.260,1,0.033,0.966,0.000,0.096,0.793,104.491,3,40s,41,20586.000,"[appalachian folk, bluegrass, bluegrass gospel...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535766,5rgu12WBIHQtvej2MdHSH0,50,258267,0,[1QLBXKM5GCpyQQSVMNZqrZ],0.560,0.518,0,-7.471,0,0.029,0.785,0.000,0.065,0.211,131.896,4,10s,38,896.000,[chinese viral pop],0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535767,0NuWgxEp51CutD2pJoF4OM,72,153293,0,[1dy5WNgIKQU6ezkpZs4y8z],0.765,0.663,0,-5.223,1,0.065,0.141,0.000,0.092,0.686,150.091,4,10s,67,245944.000,"[alt z, alternative r&b, bedroom pop, indie ca...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535768,27Y1N4Q4U3EfDU5Ubw8ws2,70,187601,0,[37M5pPGs6V1fchFJSgCguX],0.535,0.314,7,-12.823,0,0.041,0.895,0.000,0.087,0.066,145.095,4,10s,77,1168213.000,"[alt z, electropop, indie pop, la indie, pop, ...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535769,45XJsGpFTyzbzeWK8VzR8S,58,142003,0,"[4jGPdu95icCKVF31CcFKbS, 5ebPSE9YI5aLeZ1Z2gkqjn]",0.696,0.615,10,-6.212,1,0.035,0.206,0.000,0.305,0.438,90.029,4,10s,58,93797.000,"[chill r&b, indie cafe pop, singaporean pop, o...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [64]:
tracks.describe()

Unnamed: 0,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
count,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0,586601.0
mean,27.573,230054.853,0.044,0.564,0.542,5.222,-10.206,0.659,0.105,0.45,0.113,0.214,0.552,118.468,3.873
std,18.369,126532.825,0.205,0.166,0.252,3.519,5.089,0.474,0.18,0.349,0.267,0.184,0.258,29.763,0.473
min,0.0,3344.0,0.0,0.0,0.0,0.0,-60.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,13.0,175083.0,0.0,0.453,0.343,2.0,-12.891,0.0,0.034,0.097,0.0,0.098,0.346,95.606,4.0
50%,27.0,214907.0,0.0,0.577,0.549,5.0,-9.242,1.0,0.044,0.422,0.0,0.139,0.564,117.387,4.0
75%,41.0,263867.0,0.0,0.686,0.748,8.0,-6.481,1.0,0.076,0.784,0.01,0.278,0.769,136.324,4.0
max,100.0,5621218.0,1.0,0.991,1.0,11.0,5.376,1.0,0.971,0.996,1.0,1.0,1.0,246.381,5.0


In [65]:
from sklearn.preprocessing import RobustScaler, OneHotEncoder

In [66]:
scaler = RobustScaler()
feature_set2 = scaler.fit_transform(feature_set1[['duration_ms', 'loudness', 'tempo', 'artists_followers']])

In [67]:
feature_set2 = feature_set1.copy(deep=True)

In [68]:
feature_set2[['duration_ms', 'loudness', 'tempo', 'artists_followers']] = scaler.fit_transform(feature_set2[['duration_ms', 'loudness', 'tempo', 'artists_followers']])

In [69]:
feature_set2

Unnamed: 0,id,popularity,duration_ms,explicit,id_artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,era_FLAG,artists_popularity,artists_followers,combined_genres,vector_1,vector_2,vector_3,vector_4,vector_5,vector_6,vector_7,vector_8,vector_9,vector_10,vector_11,vector_12,vector_13,vector_14,vector_15,vector_16,vector_17,vector_18,vector_19,vector_20,vector_21,vector_22,vector_23,vector_24,vector_25,vector_26,vector_27,vector_28,vector_29,vector_30,vector_31,vector_32,vector_33,vector_34,vector_35,vector_36,vector_37,vector_38,vector_39,vector_40,vector_41,vector_42,vector_43,vector_44,vector_45,vector_46,vector_47,vector_48,vector_49,vector_50,vector_51,vector_52,vector_53,vector_54,vector_55,vector_56,vector_57,vector_58,vector_59,vector_60,vector_61,vector_62,vector_63,vector_64,vector_65,vector_66,vector_67,vector_68,vector_69,vector_70,vector_71,vector_72,vector_73,vector_74,vector_75,vector_76,vector_77,vector_78,vector_79,vector_80,vector_81,vector_82,vector_83,vector_84,vector_85,vector_86,vector_87,vector_88,vector_89,vector_90,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100
0,0qB213IfGN0JXXm9aRjldF,61,-0.563,0,"[3ESG6pj6a0LvUKklENalT6, 548L4DXlt7N14Mhbfdmdqq]",0.495,0.021,3,-1.966,1,0.035,0.981,0.000,0.075,0.145,-1.352,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1,20G1XJaTwIm2IuwA3Pjg1d,50,-0.550,0,"[3ESG6pj6a0LvUKklENalT6, 548L4DXlt7N14Mhbfdmdqq]",0.503,0.059,9,-1.131,1,0.050,0.973,0.000,0.103,0.331,-1.008,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
2,0qPeyVfebszZcHTUc48Lzl,45,-0.521,0,"[3ESG6pj6a0LvUKklENalT6, 5MpELOfAiq7aIBTij30phD]",0.651,0.250,5,-0.525,1,0.058,0.810,0.000,0.437,0.644,-0.856,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3,3YU16osxnEi1e0SvXIt0QW,44,-0.281,0,"[3ESG6pj6a0LvUKklENalT6, 5jCS1U0QP0gulcCtMOsOoX]",0.485,0.038,5,-1.187,1,0.029,0.977,0.006,0.086,0.205,-0.918,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
4,3x95FfMhVfB3ZNIufQ1IF3,43,-0.416,0,[64vAECmFoB6mi7n1zTRwR8],0.606,0.184,10,-0.992,1,0.033,0.966,0.000,0.096,0.793,-0.327,3,40s,41,-0.151,"[appalachian folk, bluegrass, bluegrass gospel...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535766,5rgu12WBIHQtvej2MdHSH0,50,0.457,0,[1QLBXKM5GCpyQQSVMNZqrZ],0.560,0.518,0,0.259,0,0.029,0.785,0.000,0.065,0.211,0.342,4,10s,38,-0.179,[chinese viral pop],0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535767,0NuWgxEp51CutD2pJoF4OM,72,-0.728,0,[1dy5WNgIKQU6ezkpZs4y8z],0.765,0.663,0,0.620,1,0.065,0.141,0.000,0.092,0.686,0.787,4,10s,67,0.181,"[alt z, alternative r&b, bedroom pop, indie ca...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535768,27Y1N4Q4U3EfDU5Ubw8ws2,70,-0.341,0,[37M5pPGs6V1fchFJSgCguX],0.535,0.314,7,-0.600,0,0.041,0.895,0.000,0.087,0.066,0.665,4,10s,77,1.535,"[alt z, electropop, indie pop, la indie, pop, ...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535769,45XJsGpFTyzbzeWK8VzR8S,58,-0.855,0,"[4jGPdu95icCKVF31CcFKbS, 5ebPSE9YI5aLeZ1Z2gkqjn]",0.696,0.615,10,0.461,1,0.035,0.206,0.000,0.305,0.438,-0.681,4,10s,58,-0.043,"[chill r&b, indie cafe pop, singaporean pop, o...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [70]:
feature_set2.describe()

Unnamed: 0,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,artists_popularity,artists_followers,vector_1,vector_2,vector_3,vector_4,vector_5,vector_6,vector_7,vector_8,vector_9,vector_10,vector_11,vector_12,vector_13,vector_14,vector_15,vector_16,vector_17,vector_18,vector_19,vector_20,vector_21,vector_22,vector_23,vector_24,vector_25,vector_26,vector_27,vector_28,vector_29,vector_30,vector_31,vector_32,vector_33,vector_34,vector_35,vector_36,vector_37,vector_38,vector_39,vector_40,vector_41,vector_42,vector_43,vector_44,vector_45,vector_46,vector_47,vector_48,vector_49,vector_50,vector_51,vector_52,vector_53,vector_54,vector_55,vector_56,vector_57,vector_58,vector_59,vector_60,vector_61,vector_62,vector_63,vector_64,vector_65,vector_66,vector_67,vector_68,vector_69,vector_70,vector_71,vector_72,vector_73,vector_74,vector_75,vector_76,vector_77,vector_78,vector_79,vector_80,vector_81,vector_82,vector_83,vector_84,vector_85,vector_86,vector_87,vector_88,vector_89,vector_90,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100
count,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0
mean,29.224,0.149,0.045,0.563,0.554,5.224,-0.149,0.657,0.1,0.429,0.099,0.214,0.553,0.023,3.878,52.766,1.676,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0
std,17.686,1.349,0.207,0.165,0.248,3.52,0.795,0.475,0.172,0.339,0.25,0.186,0.258,0.724,0.46,18.897,6.347,0.001,0.002,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.002,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.002,0.002,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.002
min,0.0,-2.4,0.0,0.0,0.0,0.0,-8.174,0.0,0.0,0.0,0.0,0.0,0.0,-2.882,0.0,0.0,-0.181,-0.12,-0.564,-0.133,-0.116,-0.162,-0.062,-0.081,-0.292,-0.078,-0.105,-0.216,-0.595,-0.402,-0.251,-0.085,-0.195,-0.377,-0.186,-0.065,-0.034,-0.069,-0.197,-0.262,-0.175,-0.193,-0.25,-0.048,-0.333,-0.49,-0.267,-0.048,-0.149,-0.091,-0.033,-0.649,-0.188,-0.239,-0.061,-0.113,-0.193,-0.147,-0.027,-0.373,-0.087,-0.129,-0.102,-0.444,-0.091,-0.302,-0.258,-0.094,-0.273,-0.062,-0.048,-0.046,-0.155,-0.577,-0.309,-0.086,-0.412,-0.054,-0.086,-0.1,-0.206,-0.09,-0.251,-0.274,-0.052,-0.188,-0.499,-0.271,-0.257,-0.043,-0.149,-0.14,-0.137,-0.505,-0.076,-0.022,-0.357,-0.181,-0.151,-0.165,-0.157,-0.099,-0.134,-0.116,-0.109,-0.195,-0.183,-0.03,-0.076,-0.061,-0.058,-0.084,-0.102,-0.187,-0.293,-0.111,-0.492
25%,16.0,-0.461,0.0,0.454,0.366,2.0,-0.569,0.0,0.034,0.089,0.0,0.098,0.346,-0.535,4.0,40.0,-0.155,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,29.0,0.0,0.0,0.576,0.564,5.0,0.0,1.0,0.043,0.392,0.0,0.138,0.563,0.0,4.0,54.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,41.0,0.539,0.0,0.684,0.756,8.0,0.431,1.0,0.073,0.748,0.005,0.278,0.77,0.465,4.0,67.0,0.845,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,100.0,60.977,1.0,0.991,1.0,11.0,2.321,1.0,0.971,0.996,1.0,1.0,1.0,3.141,5.0,100.0,115.729,0.113,0.138,0.177,0.117,0.717,0.241,0.424,0.13,0.174,0.268,0.311,0.059,0.136,0.168,0.185,0.106,0.081,0.254,0.187,0.327,0.112,0.083,0.122,0.054,0.25,0.036,0.418,0.075,0.1,0.145,0.352,0.182,0.405,0.172,0.197,0.055,0.059,0.243,0.068,0.111,0.199,0.249,0.087,0.077,0.043,0.442,0.334,0.554,0.126,0.168,0.566,0.118,0.346,0.296,0.143,0.135,0.093,0.12,0.269,0.162,0.191,0.455,0.136,0.143,0.512,0.159,0.11,0.167,0.098,0.04,0.05,0.058,0.149,0.152,0.156,0.169,0.101,0.338,0.303,0.036,0.088,0.258,0.102,0.323,0.063,0.436,0.033,0.156,0.069,0.062,0.304,0.617,0.327,0.492,0.407,0.129,0.147,0.04,0.41,0.04


In [71]:
artists = set()
def unique_artists(artist_ids):
    for id in artist_ids:
        if id not in artists:
            artists.add(id)
feature_set2.id_artists.apply(unique_artists)
len(artists), artists

(74906,
 {'4gzpq5DPGxSnKTe4SA8HAU',
  '3kVGGn3Ajp7nFZTefceJ8x',
  '43DvNhTkMxGgYFd7k6KbMe',
  '1AM52XHXnWAEqnQwpxTLWm',
  '0mNoR8lB5SJMqHooobJrNZ',
  '4Le6xcHzz9CpoxAmXgtZP5',
  '0MIuvybztu95QtdglGYTBD',
  '7n5xb6bByE74Aw4ppe858H',
  '7tKxPkgvkySUyUavtYGA2S',
  '7IAXZaLTb6nkJr8RmVPn5y',
  '6WI9EjCdJWcwOFFtubMrGM',
  '6Y1XXFYsCelh1i5PArcLeV',
  '2bvveEa6ur7at6sVbWGXPE',
  '5u1MOcNpPkXo0wDGKgXqbX',
  '46cEeRi8p3fkd1hN0N820b',
  '0dwFxqYkvZLSA6U6XfQcDV',
  '7xihR0iAXmuXt2XJobep51',
  '297v5fJss3Pj6tJafnw8yi',
  '5SpJ7wupgkKXKKpMATIhe9',
  '4QPuhZw8xXaXXvFMmpzV5S',
  '6gK1Uct5FEdaUWRWpU4Cl2',
  '3YqBNs0XBa0zIWviH0cl4W',
  '4FC6XIiULAVlbmfrrPXsZy',
  '01vKmLvDQE2YkAA5dDPQQf',
  '7fj0wyzVM81RMmk21jWmkO',
  '5rGnc6zmQSjWGJ8KEAvGFq',
  '2ZiHQ3wOlb6bH7bf1R8NTB',
  '7tuUo4Kby0sTXYcctxdlYa',
  '4uE9TgBW0AaPDHL1qYbtd0',
  '643YpbScVr0u07i276NRiP',
  '1ZKhPkCXXgtiGgALn4OYtT',
  '4n9aZleefLyCiDvlcmyOu9',
  '2NpPlwwDVYR5dIj0F31EcC',
  '4uwquYq1zZAdTEG22VGAso',
  '0kj2q6SLR1G9zFp509cJ8x',
  '73ctpNvw9

In [73]:
feature_set2['popularity_buckets'] = feature_set2['popularity'].apply(lambda x: int(x/5))
feature_set2['artist_popularity_buckets'] = feature_set2['artists_popularity'].apply(lambda x: int(x/5))

In [74]:
feature_set2.era_FLAG

0         40s
1         40s
2         40s
3         40s
4         40s
         ... 
535766    10s
535767    10s
535768    10s
535769    10s
535770    10s
Name: era_FLAG, Length: 535771, dtype: category
Categories (8, object): ['40s', '50s', '60s', '70s', '80s', '90s', '00s', '10s']

In [75]:
encoder = OneHotEncoder(sparse_output=False)
features_encoded = encoder.fit_transform(feature_set2[['key','popularity_buckets','time_signature' ,'era_FLAG' ,'artist_popularity_buckets']])

In [76]:
features_encoded.shape

(535771, 67)

In [77]:
type(features_encoded) 

numpy.ndarray

In [78]:
categories_key = encoder.categories_[0].tolist()
categories_popularity_buckets = encoder.categories_[1].tolist()
categories_time_signature = encoder.categories_[2].tolist()
categories_era_FLAG = encoder.categories_[3].tolist()
categories_artist_popularity_buckets = encoder.categories_[4].tolist()
feature_names = categories_key + categories_popularity_buckets + categories_time_signature + categories_era_FLAG + categories_artist_popularity_buckets

features_encoded_df = pd.DataFrame(features_encoded, columns=feature_names)

In [79]:
features_encoded_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12,13,14,15,16,17,18,19,20,0.2,1.2,3.2,4.2,5.2,00s,10s,40s,50s,60s,70s,80s,90s,0.3,1.3,2.2,3.3,4.3,5.3,6.2,7.2,8.2,9.2,10.2,11.2,12.1,13.1,14.1,15.1,16.1,17.1,18.1,19.1,20.1
count,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0,535771.0
mean,0.129,0.069,0.115,0.035,0.084,0.09,0.052,0.127,0.056,0.113,0.063,0.067,0.093,0.068,0.073,0.084,0.1,0.1,0.098,0.1,0.083,0.065,0.051,0.036,0.023,0.014,0.008,0.004,0.001,0.0,0.0,0.0,0.0,0.0,0.011,0.106,0.865,0.018,0.156,0.213,0.019,0.064,0.086,0.113,0.151,0.197,0.01,0.009,0.013,0.018,0.028,0.038,0.053,0.065,0.083,0.092,0.107,0.095,0.099,0.086,0.08,0.053,0.035,0.015,0.014,0.004,0.001
std,0.335,0.254,0.32,0.183,0.277,0.287,0.222,0.333,0.23,0.317,0.242,0.251,0.29,0.251,0.26,0.278,0.299,0.3,0.297,0.3,0.276,0.246,0.219,0.187,0.151,0.118,0.089,0.061,0.037,0.019,0.009,0.004,0.001,0.02,0.102,0.308,0.342,0.133,0.363,0.409,0.137,0.244,0.281,0.317,0.358,0.398,0.101,0.095,0.115,0.132,0.166,0.19,0.225,0.247,0.277,0.289,0.309,0.293,0.299,0.281,0.272,0.224,0.184,0.12,0.115,0.064,0.027
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [80]:
complete_feature_set = pd.concat([feature_set2, features_encoded_df], axis=1)

In [81]:
complete_feature_set

Unnamed: 0,id,popularity,duration_ms,explicit,id_artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,era_FLAG,artists_popularity,artists_followers,combined_genres,vector_1,vector_2,vector_3,vector_4,vector_5,vector_6,vector_7,vector_8,vector_9,vector_10,vector_11,vector_12,vector_13,vector_14,vector_15,vector_16,vector_17,vector_18,vector_19,vector_20,vector_21,vector_22,vector_23,vector_24,vector_25,vector_26,vector_27,vector_28,vector_29,vector_30,vector_31,vector_32,vector_33,vector_34,vector_35,vector_36,vector_37,vector_38,vector_39,vector_40,vector_41,vector_42,vector_43,vector_44,vector_45,vector_46,vector_47,vector_48,vector_49,vector_50,vector_51,vector_52,vector_53,vector_54,vector_55,vector_56,vector_57,vector_58,vector_59,vector_60,vector_61,vector_62,vector_63,vector_64,vector_65,vector_66,vector_67,vector_68,vector_69,vector_70,vector_71,vector_72,vector_73,vector_74,vector_75,vector_76,vector_77,vector_78,vector_79,vector_80,vector_81,vector_82,vector_83,vector_84,vector_85,vector_86,vector_87,vector_88,vector_89,vector_90,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100,popularity_buckets,artist_popularity_buckets,0,1,2,3,4,5,6,7,8,9,10,11,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12,13,14,15,16,17,18,19,20,0.2,1.2,3.2,4.2,5.2,00s,10s,40s,50s,60s,70s,80s,90s,0.3,1.3,2.2,3.3,4.3,5.3,6.2,7.2,8.2,9.2,10.2,11.2,12.1,13.1,14.1,15.1,16.1,17.1,18.1,19.1,20.1
0,0qB213IfGN0JXXm9aRjldF,61,-0.563,0,"[3ESG6pj6a0LvUKklENalT6, 548L4DXlt7N14Mhbfdmdqq]",0.495,0.021,3,-1.966,1,0.035,0.981,0.000,0.075,0.145,-1.352,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,12,12,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1,20G1XJaTwIm2IuwA3Pjg1d,50,-0.550,0,"[3ESG6pj6a0LvUKklENalT6, 548L4DXlt7N14Mhbfdmdqq]",0.503,0.059,9,-1.131,1,0.050,0.973,0.000,0.103,0.331,-1.008,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,10,12,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
2,0qPeyVfebszZcHTUc48Lzl,45,-0.521,0,"[3ESG6pj6a0LvUKklENalT6, 5MpELOfAiq7aIBTij30phD]",0.651,0.250,5,-0.525,1,0.058,0.810,0.000,0.437,0.644,-0.856,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,9,12,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3,3YU16osxnEi1e0SvXIt0QW,44,-0.281,0,"[3ESG6pj6a0LvUKklENalT6, 5jCS1U0QP0gulcCtMOsOoX]",0.485,0.038,5,-1.187,1,0.029,0.977,0.006,0.086,0.205,-0.918,4,40s,61,0.253,"[adult standards, brill building pop, easy lis...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,8,12,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
4,3x95FfMhVfB3ZNIufQ1IF3,43,-0.416,0,[64vAECmFoB6mi7n1zTRwR8],0.606,0.184,10,-0.992,1,0.033,0.966,0.000,0.096,0.793,-0.327,3,40s,41,-0.151,"[appalachian folk, bluegrass, bluegrass gospel...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,8,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535766,5rgu12WBIHQtvej2MdHSH0,50,0.457,0,[1QLBXKM5GCpyQQSVMNZqrZ],0.560,0.518,0,0.259,0,0.029,0.785,0.000,0.065,0.211,0.342,4,10s,38,-0.179,[chinese viral pop],0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,10,7,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535767,0NuWgxEp51CutD2pJoF4OM,72,-0.728,0,[1dy5WNgIKQU6ezkpZs4y8z],0.765,0.663,0,0.620,1,0.065,0.141,0.000,0.092,0.686,0.787,4,10s,67,0.181,"[alt z, alternative r&b, bedroom pop, indie ca...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,14,13,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535768,27Y1N4Q4U3EfDU5Ubw8ws2,70,-0.341,0,[37M5pPGs6V1fchFJSgCguX],0.535,0.314,7,-0.600,0,0.041,0.895,0.000,0.087,0.066,0.665,4,10s,77,1.535,"[alt z, electropop, indie pop, la indie, pop, ...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,14,15,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000
535769,45XJsGpFTyzbzeWK8VzR8S,58,-0.855,0,"[4jGPdu95icCKVF31CcFKbS, 5ebPSE9YI5aLeZ1Z2gkqjn]",0.696,0.615,10,0.461,1,0.035,0.206,0.000,0.305,0.438,-0.681,4,10s,58,-0.043,"[chill r&b, indie cafe pop, singaporean pop, o...",0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,11,11,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [82]:
complete_feature_set1 = complete_feature_set.loc[:, ~complete_feature_set.columns.isin(['popularity','key','time_signature','id_artists','era_FLAG','artists_popularity', 'combined_genres'])]

In [83]:
complete_feature_set1

Unnamed: 0,id,duration_ms,explicit,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,artists_followers,vector_1,vector_2,vector_3,vector_4,vector_5,vector_6,vector_7,vector_8,vector_9,vector_10,vector_11,vector_12,vector_13,vector_14,vector_15,vector_16,vector_17,vector_18,vector_19,vector_20,vector_21,vector_22,vector_23,vector_24,vector_25,vector_26,vector_27,vector_28,vector_29,vector_30,vector_31,vector_32,vector_33,vector_34,vector_35,vector_36,vector_37,vector_38,vector_39,vector_40,vector_41,vector_42,vector_43,vector_44,vector_45,vector_46,vector_47,vector_48,vector_49,vector_50,vector_51,vector_52,vector_53,vector_54,vector_55,vector_56,vector_57,vector_58,vector_59,vector_60,vector_61,vector_62,vector_63,vector_64,vector_65,vector_66,vector_67,vector_68,vector_69,vector_70,vector_71,vector_72,vector_73,vector_74,vector_75,vector_76,vector_77,vector_78,vector_79,vector_80,vector_81,vector_82,vector_83,vector_84,vector_85,vector_86,vector_87,vector_88,vector_89,vector_90,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100,popularity_buckets,artist_popularity_buckets,0,1,2,3,4,5,6,7,8,9,10,11,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12,13,14,15,16,17,18,19,20,0.2,1.2,3.2,4.2,5.2,00s,10s,40s,50s,60s,70s,80s,90s,0.3,1.3,2.2,3.3,4.3,5.3,6.2,7.2,8.2,9.2,10.2,11.2,12.1,13.1,14.1,15.1,16.1,17.1,18.1,19.1,20.1
0,0qB213IfGN0JXXm9aRjldF,-0.563,0,0.495,0.021,-1.966,1,0.035,0.981,0.000,0.075,0.145,-1.352,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,12,12,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1,20G1XJaTwIm2IuwA3Pjg1d,-0.550,0,0.503,0.059,-1.131,1,0.050,0.973,0.000,0.103,0.331,-1.008,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,10,12,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
2,0qPeyVfebszZcHTUc48Lzl,-0.521,0,0.651,0.250,-0.525,1,0.058,0.810,0.000,0.437,0.644,-0.856,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,9,12,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3,3YU16osxnEi1e0SvXIt0QW,-0.281,0,0.485,0.038,-1.187,1,0.029,0.977,0.006,0.086,0.205,-0.918,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,8,12,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
4,3x95FfMhVfB3ZNIufQ1IF3,-0.416,0,0.606,0.184,-0.992,1,0.033,0.966,0.000,0.096,0.793,-0.327,-0.151,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,8,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535766,5rgu12WBIHQtvej2MdHSH0,0.457,0,0.560,0.518,0.259,0,0.029,0.785,0.000,0.065,0.211,0.342,-0.179,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,10,7,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535767,0NuWgxEp51CutD2pJoF4OM,-0.728,0,0.765,0.663,0.620,1,0.065,0.141,0.000,0.092,0.686,0.787,0.181,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,14,13,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535768,27Y1N4Q4U3EfDU5Ubw8ws2,-0.341,0,0.535,0.314,-0.600,0,0.041,0.895,0.000,0.087,0.066,0.665,1.535,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,14,15,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000
535769,45XJsGpFTyzbzeWK8VzR8S,-0.855,0,0.696,0.615,0.461,1,0.035,0.206,0.000,0.305,0.438,-0.681,-0.043,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,11,11,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [85]:
complete_feature_set1.to_csv('complete_feature_set1.csv')

In [86]:
complete_feature_set1.shape

(535771, 183)

In [87]:
complete_feature_set1.columns = complete_feature_set1.columns.astype(str)

In [88]:
df_for_pca = complete_feature_set1.loc[:, ~complete_feature_set1.columns.isin(['id'])]

In [89]:
df_for_pca.columns = df_for_pca.columns.astype(str)

In [90]:
df_for_pca

Unnamed: 0,duration_ms,explicit,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,artists_followers,vector_1,vector_2,vector_3,vector_4,vector_5,vector_6,vector_7,vector_8,vector_9,vector_10,vector_11,vector_12,vector_13,vector_14,vector_15,vector_16,vector_17,vector_18,vector_19,vector_20,vector_21,vector_22,vector_23,vector_24,vector_25,vector_26,vector_27,vector_28,vector_29,vector_30,vector_31,vector_32,vector_33,vector_34,vector_35,vector_36,vector_37,vector_38,vector_39,vector_40,vector_41,vector_42,vector_43,vector_44,vector_45,vector_46,vector_47,vector_48,vector_49,vector_50,vector_51,vector_52,vector_53,vector_54,vector_55,vector_56,vector_57,vector_58,vector_59,vector_60,vector_61,vector_62,vector_63,vector_64,vector_65,vector_66,vector_67,vector_68,vector_69,vector_70,vector_71,vector_72,vector_73,vector_74,vector_75,vector_76,vector_77,vector_78,vector_79,vector_80,vector_81,vector_82,vector_83,vector_84,vector_85,vector_86,vector_87,vector_88,vector_89,vector_90,vector_91,vector_92,vector_93,vector_94,vector_95,vector_96,vector_97,vector_98,vector_99,vector_100,popularity_buckets,artist_popularity_buckets,0,1,2,3,4,5,6,7,8,9,10,11,0.1,1.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12,13,14,15,16,17,18,19,20,0.2,1.2,3.2,4.2,5.2,00s,10s,40s,50s,60s,70s,80s,90s,0.3,1.3,2.2,3.3,4.3,5.3,6.2,7.2,8.2,9.2,10.2,11.2,12.1,13.1,14.1,15.1,16.1,17.1,18.1,19.1,20.1
0,-0.563,0,0.495,0.021,-1.966,1,0.035,0.981,0.000,0.075,0.145,-1.352,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,12,12,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
1,-0.550,0,0.503,0.059,-1.131,1,0.050,0.973,0.000,0.103,0.331,-1.008,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,10,12,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
2,-0.521,0,0.651,0.250,-0.525,1,0.058,0.810,0.000,0.437,0.644,-0.856,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,9,12,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
3,-0.281,0,0.485,0.038,-1.187,1,0.029,0.977,0.006,0.086,0.205,-0.918,0.253,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,8,12,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
4,-0.416,0,0.606,0.184,-0.992,1,0.033,0.966,0.000,0.096,0.793,-0.327,-0.151,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,8,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535766,0.457,0,0.560,0.518,0.259,0,0.029,0.785,0.000,0.065,0.211,0.342,-0.179,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,10,7,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535767,-0.728,0,0.765,0.663,0.620,1,0.065,0.141,0.000,0.092,0.686,0.787,0.181,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,14,13,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
535768,-0.341,0,0.535,0.314,-0.600,0,0.041,0.895,0.000,0.087,0.066,0.665,1.535,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,14,15,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000
535769,-0.855,0,0.696,0.615,0.461,1,0.035,0.206,0.000,0.305,0.438,-0.681,-0.043,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,11,11,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [91]:
df_for_pca.isnull().sum()

duration_ms     0
explicit        0
danceability    0
energy          0
loudness        0
               ..
16              0
17              0
18              0
19              0
20              0
Length: 182, dtype: int64

In [92]:
from sklearn.decomposition import PCA
pca = PCA(n_components=50)
pca_result = pca.fit_transform(df_for_pca)

In [93]:
pca_columns = [f'PC{i+1}' for i in range(50)]  # Naming the columns PC1, PC2, ..., PC50
pca_df = pd.DataFrame(data=pca_result, columns=pca_columns)

In [94]:
pca_df

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20,PC21,PC22,PC23,PC24,PC25,PC26,PC27,PC28,PC29,PC30,PC31,PC32,PC33,PC34,PC35,PC36,PC37,PC38,PC39,PC40,PC41,PC42,PC43,PC44,PC45,PC46,PC47,PC48,PC49,PC50
0,0.746,-6.131,2.625,-0.731,2.986,-0.114,-0.264,0.159,-0.387,-0.100,-0.269,-0.010,-0.037,-0.095,0.113,-0.218,0.452,0.312,0.089,-0.351,-0.392,-0.311,0.309,-0.042,0.021,-0.084,-0.203,-0.093,-0.176,-0.027,0.164,0.091,0.097,-0.121,0.052,-0.125,0.037,-0.047,-0.018,0.022,-0.031,0.153,-0.154,0.054,-0.037,0.028,-0.406,-0.009,0.572,0.761
1,0.326,-4.777,1.382,-0.706,1.897,-0.252,-0.236,0.125,-0.251,-0.112,-0.227,-0.015,0.096,-0.245,0.936,-0.203,0.410,0.239,0.144,-0.569,-0.417,-0.039,0.304,-0.054,-0.137,-0.157,-0.227,-0.153,-0.242,-0.155,0.054,0.158,0.082,-0.047,-0.015,-0.082,0.114,-0.054,-0.342,-0.204,-0.355,0.177,-0.443,-0.568,-0.254,-0.076,0.001,0.062,-0.023,-0.074
2,0.120,-4.108,0.779,-0.683,1.143,-0.469,-0.301,0.163,-0.164,-0.082,-0.235,-0.007,0.015,-0.094,0.102,-0.188,0.769,0.556,-0.078,0.163,-0.402,-0.508,0.282,-0.112,-0.327,-0.419,0.082,0.045,-0.256,-0.197,-0.139,0.376,-0.012,0.422,0.035,0.057,0.116,0.208,0.618,0.004,0.302,0.012,-0.142,-0.172,-0.211,0.147,0.048,-0.002,-0.113,0.048
3,-0.112,-3.382,0.041,-0.400,1.714,-0.220,-0.246,0.189,-0.259,-0.077,-0.362,-0.005,-0.059,-0.134,0.121,-0.186,0.760,0.552,-0.096,0.035,-0.317,-0.472,0.225,-0.147,0.016,-0.787,0.105,0.182,0.431,-0.461,-0.098,-0.313,0.033,-0.295,-0.035,-0.150,0.071,-0.033,0.228,-0.032,0.138,0.038,-0.243,-0.087,-0.146,0.035,-0.162,0.049,0.013,-0.099
4,-1.904,-1.166,2.908,-0.648,1.531,0.390,-0.217,0.497,0.989,0.059,0.066,0.000,0.009,-0.054,0.077,-0.125,0.168,0.122,-0.146,0.047,0.083,0.104,-0.097,-0.482,0.477,-0.121,0.302,-0.215,0.633,-0.339,0.850,-0.337,-0.081,-0.059,0.228,0.607,-0.428,-0.181,0.312,-0.311,-0.014,-0.065,0.006,-0.167,-0.243,0.021,-0.215,0.011,-0.002,-0.122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535766,-1.798,-2.068,5.206,0.203,0.148,0.202,0.673,0.570,-0.234,0.246,0.010,0.755,-0.026,0.491,-0.205,-0.046,0.059,0.064,-0.012,-0.235,-0.074,0.325,-0.008,-0.091,-0.270,0.049,-0.067,-0.143,-0.209,0.024,-0.380,0.057,0.231,-0.594,-0.111,0.074,-0.198,0.556,-0.258,-0.105,-0.489,0.055,-0.288,-0.638,-0.091,-0.186,0.151,-0.003,0.025,-0.069
535767,1.542,-8.292,3.608,-0.980,-0.381,0.506,-0.281,0.623,-0.360,0.271,0.103,0.729,0.006,0.422,-0.103,-0.077,-0.021,0.028,-0.052,-0.103,0.094,0.294,-0.360,-0.423,-0.071,0.108,-0.201,0.521,-0.249,-0.013,-0.047,-0.004,-0.073,0.026,-0.013,0.079,0.134,-0.109,-0.045,-0.062,-0.112,-0.031,0.117,0.104,0.001,-0.039,-0.088,-0.095,-0.012,-0.015
535768,3.461,-8.864,2.189,-0.530,0.851,0.977,0.750,0.470,-0.401,0.226,-0.130,-0.653,-0.152,0.552,-0.207,0.005,0.050,0.025,-0.013,-0.238,0.035,0.314,-0.083,-0.019,-0.237,0.048,0.002,-0.199,-0.157,-0.301,-0.033,0.120,0.173,-0.290,-0.138,-0.075,-0.314,0.383,-0.476,-0.105,0.561,-0.032,-0.222,0.275,-0.185,-0.247,-0.048,-0.089,-0.097,-0.017
535769,-0.032,-5.101,2.966,-1.081,0.258,-0.803,-0.140,0.655,-0.292,0.343,0.069,-0.014,0.047,-0.073,0.064,-0.195,-0.076,-0.128,-0.238,0.437,0.141,0.285,0.597,0.249,0.130,0.013,-0.129,-0.004,-0.077,0.063,0.209,0.107,0.177,-0.139,0.226,0.346,-0.576,-0.457,0.046,-0.318,-0.148,-0.081,0.027,0.337,0.199,0.216,-0.456,-0.510,-0.211,0.118


In [97]:
type(complete_feature_set1[['id']])

pandas.core.frame.DataFrame

In [98]:
final_features = pd.concat([complete_feature_set1[['id']], pca_df], axis=1)

In [99]:
final_features

Unnamed: 0,id,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,PC12,PC13,PC14,PC15,PC16,PC17,PC18,PC19,PC20,PC21,PC22,PC23,PC24,PC25,PC26,PC27,PC28,PC29,PC30,PC31,PC32,PC33,PC34,PC35,PC36,PC37,PC38,PC39,PC40,PC41,PC42,PC43,PC44,PC45,PC46,PC47,PC48,PC49,PC50
0,0qB213IfGN0JXXm9aRjldF,0.746,-6.131,2.625,-0.731,2.986,-0.114,-0.264,0.159,-0.387,-0.100,-0.269,-0.010,-0.037,-0.095,0.113,-0.218,0.452,0.312,0.089,-0.351,-0.392,-0.311,0.309,-0.042,0.021,-0.084,-0.203,-0.093,-0.176,-0.027,0.164,0.091,0.097,-0.121,0.052,-0.125,0.037,-0.047,-0.018,0.022,-0.031,0.153,-0.154,0.054,-0.037,0.028,-0.406,-0.009,0.572,0.761
1,20G1XJaTwIm2IuwA3Pjg1d,0.326,-4.777,1.382,-0.706,1.897,-0.252,-0.236,0.125,-0.251,-0.112,-0.227,-0.015,0.096,-0.245,0.936,-0.203,0.410,0.239,0.144,-0.569,-0.417,-0.039,0.304,-0.054,-0.137,-0.157,-0.227,-0.153,-0.242,-0.155,0.054,0.158,0.082,-0.047,-0.015,-0.082,0.114,-0.054,-0.342,-0.204,-0.355,0.177,-0.443,-0.568,-0.254,-0.076,0.001,0.062,-0.023,-0.074
2,0qPeyVfebszZcHTUc48Lzl,0.120,-4.108,0.779,-0.683,1.143,-0.469,-0.301,0.163,-0.164,-0.082,-0.235,-0.007,0.015,-0.094,0.102,-0.188,0.769,0.556,-0.078,0.163,-0.402,-0.508,0.282,-0.112,-0.327,-0.419,0.082,0.045,-0.256,-0.197,-0.139,0.376,-0.012,0.422,0.035,0.057,0.116,0.208,0.618,0.004,0.302,0.012,-0.142,-0.172,-0.211,0.147,0.048,-0.002,-0.113,0.048
3,3YU16osxnEi1e0SvXIt0QW,-0.112,-3.382,0.041,-0.400,1.714,-0.220,-0.246,0.189,-0.259,-0.077,-0.362,-0.005,-0.059,-0.134,0.121,-0.186,0.760,0.552,-0.096,0.035,-0.317,-0.472,0.225,-0.147,0.016,-0.787,0.105,0.182,0.431,-0.461,-0.098,-0.313,0.033,-0.295,-0.035,-0.150,0.071,-0.033,0.228,-0.032,0.138,0.038,-0.243,-0.087,-0.146,0.035,-0.162,0.049,0.013,-0.099
4,3x95FfMhVfB3ZNIufQ1IF3,-1.904,-1.166,2.908,-0.648,1.531,0.390,-0.217,0.497,0.989,0.059,0.066,0.000,0.009,-0.054,0.077,-0.125,0.168,0.122,-0.146,0.047,0.083,0.104,-0.097,-0.482,0.477,-0.121,0.302,-0.215,0.633,-0.339,0.850,-0.337,-0.081,-0.059,0.228,0.607,-0.428,-0.181,0.312,-0.311,-0.014,-0.065,0.006,-0.167,-0.243,0.021,-0.215,0.011,-0.002,-0.122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535766,5rgu12WBIHQtvej2MdHSH0,-1.798,-2.068,5.206,0.203,0.148,0.202,0.673,0.570,-0.234,0.246,0.010,0.755,-0.026,0.491,-0.205,-0.046,0.059,0.064,-0.012,-0.235,-0.074,0.325,-0.008,-0.091,-0.270,0.049,-0.067,-0.143,-0.209,0.024,-0.380,0.057,0.231,-0.594,-0.111,0.074,-0.198,0.556,-0.258,-0.105,-0.489,0.055,-0.288,-0.638,-0.091,-0.186,0.151,-0.003,0.025,-0.069
535767,0NuWgxEp51CutD2pJoF4OM,1.542,-8.292,3.608,-0.980,-0.381,0.506,-0.281,0.623,-0.360,0.271,0.103,0.729,0.006,0.422,-0.103,-0.077,-0.021,0.028,-0.052,-0.103,0.094,0.294,-0.360,-0.423,-0.071,0.108,-0.201,0.521,-0.249,-0.013,-0.047,-0.004,-0.073,0.026,-0.013,0.079,0.134,-0.109,-0.045,-0.062,-0.112,-0.031,0.117,0.104,0.001,-0.039,-0.088,-0.095,-0.012,-0.015
535768,27Y1N4Q4U3EfDU5Ubw8ws2,3.461,-8.864,2.189,-0.530,0.851,0.977,0.750,0.470,-0.401,0.226,-0.130,-0.653,-0.152,0.552,-0.207,0.005,0.050,0.025,-0.013,-0.238,0.035,0.314,-0.083,-0.019,-0.237,0.048,0.002,-0.199,-0.157,-0.301,-0.033,0.120,0.173,-0.290,-0.138,-0.075,-0.314,0.383,-0.476,-0.105,0.561,-0.032,-0.222,0.275,-0.185,-0.247,-0.048,-0.089,-0.097,-0.017
535769,45XJsGpFTyzbzeWK8VzR8S,-0.032,-5.101,2.966,-1.081,0.258,-0.803,-0.140,0.655,-0.292,0.343,0.069,-0.014,0.047,-0.073,0.064,-0.195,-0.076,-0.128,-0.238,0.437,0.141,0.285,0.597,0.249,0.130,0.013,-0.129,-0.004,-0.077,0.063,0.209,0.107,0.177,-0.139,0.226,0.346,-0.576,-0.457,0.046,-0.318,-0.148,-0.081,0.027,0.337,0.199,0.216,-0.456,-0.510,-0.211,0.118


In [101]:
final_features.to_csv('final_features.csv')

In [107]:
sample_final_features = final_features.sample(20000)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

feature_columns = sample_final_features.columns[1:] 
feature_matrix = sample_final_features[feature_columns].values

cosine_sim_matrix = cosine_similarity(feature_matrix)