In [3]:
import pandas as pd
import json
import sys
sys.path.append('..')
from spotify_api import SpotifyAPI

In [18]:
track_data = pd.read_json('../datasets/rank_1/track_data.json')
artist_ids = [row['artists'][0]['id'] for _, row in track_data.iterrows()]
album_ids = [row['album']['id'] for _, row in track_data.iterrows()]
explicits = [row['explicit'] for _, row in track_data.iterrows()]

In [46]:
unique_df = pd.read_csv('datasets/rank_1/unique.csv')
unique_df.head()

Unnamed: 0,Song,Artist,Track ID
0,Poor Little Fool,Ricky Nelson,5ayybTSXNwcarDtxQKqvWX
1,Nel Blu Dipinto Di Blu (Volare),Domenico Modugno,006Ndmw2hHxvnLbJsBFnPx
2,Little Star,The Elegants,6xupOaBWORbDmakCdQwMRG
3,It's All In The Game,Tommy Edwards,2tvt5K7y1gndmCgtIoLo1f
4,It's Only Make Believe,Conway Twitty,1xVOttVNT27FBTD8iHjOfU


In [47]:
unique_df['Artist ID'] = artist_ids
unique_df['Album ID'] = album_ids
unique_df.to_csv('datasets/rank_1/metadata.csv', index=False)
unique_df.head()

Unnamed: 0,Song,Artist,Track ID,Artist ID,Album ID
0,Poor Little Fool,Ricky Nelson,5ayybTSXNwcarDtxQKqvWX,73sSFVlM6pkweLXE8qw1OS,08TlIPCh7esOAnqBvlmkEt
1,Nel Blu Dipinto Di Blu (Volare),Domenico Modugno,006Ndmw2hHxvnLbJsBFnPx,4llklDtTTyMYMY2LfFOkTI,3RWfLO5X2CSxBI5oH84mra
2,Little Star,The Elegants,6xupOaBWORbDmakCdQwMRG,7bNoMfBqbaLJrfH3Vw1q6L,7mFC0w7g7lopYnuQeZH7rd
3,It's All In The Game,Tommy Edwards,2tvt5K7y1gndmCgtIoLo1f,1ObAu0tz0NlOI6FNHTXchD,5Pm6JmFRagCaNbD5OLlG3d
4,It's Only Make Believe,Conway Twitty,1xVOttVNT27FBTD8iHjOfU,7gi3jmwpUpNWdswT8eEprF,3niZ3iipFSFzO6335VBZiP


In [4]:
api = SpotifyAPI(secrets_path='secrets.json')

In [7]:
artists_data = await api.get_artists_data(artist_ids=unique_df['Artist ID'], retries=3, delay=3)
artist_genres = [item['genres'] for item in artists_data]

In [39]:
unique_df['artist_genres'] = artist_genres
unique_df['explicit'] = explicits

features_df = pd.read_csv('../datasets/features.csv')
unique_df['duration'] = features_df['duration_ms']
unique_df['duration'] = unique_df['duration_ms'].apply(lambda x: f"{x // 60000}:{(x % 60000) // 1000:02d}")

unique_df = unique_df.drop(columns=['duration_ms'])

key_mapping = {0: "C", 1: "C♯/D♭", 2: "D", 3: "D♯/E♭", 4: "E", 5: "F", 6: "F♯/G♭", 7: "G", 8: "G♯/A♭", 9: "A", 10: "A♯/B♭", 11: "B"}
unique_df['key'] = features_df['key'].map(key_mapping).astype('category')
unique_df['mode'] = features_df['mode'].replace({0: 'Minor', 1: 'Major'}).astype('category')
unique_df['time_signature'] = features_df['time_signature'].astype('category')

unique_df = unique_df.rename(columns={
    'Song': 'track_name',
    'Artist': 'artist_name',
    'Track ID': 'track_id',
    'Artist ID': 'artist_id'
})

col_order = ['track_id', 'track_name', 'artist_id', 'artist_name']
cols = col_order + [col for col in unique_df.columns if col not in col_order]
unique_df = unique_df[cols]

unique_df.to_csv('datasets/rank_1/track_info.csv', index=False)
unique_df.head()

Unnamed: 0,track_id,track_name,artist_id,artist_name,Album ID,artist_genres,explicit,duration,key,mode,time_signature
0,5ayybTSXNwcarDtxQKqvWX,Poor Little Fool,73sSFVlM6pkweLXE8qw1OS,Ricky Nelson,08TlIPCh7esOAnqBvlmkEt,"[adult standards, doo-wop, rock-and-roll, rock...",False,2:33,C,Major,4
1,006Ndmw2hHxvnLbJsBFnPx,Nel Blu Dipinto Di Blu (Volare),4llklDtTTyMYMY2LfFOkTI,Domenico Modugno,3RWfLO5X2CSxBI5oH84mra,"[classic italian pop, italian adult pop]",False,3:36,A♯/B♭,Major,4
2,6xupOaBWORbDmakCdQwMRG,Little Star,7bNoMfBqbaLJrfH3Vw1q6L,The Elegants,7mFC0w7g7lopYnuQeZH7rd,[doo-wop],False,2:40,A,Major,4
3,2tvt5K7y1gndmCgtIoLo1f,It's All In The Game,1ObAu0tz0NlOI6FNHTXchD,Tommy Edwards,5Pm6JmFRagCaNbD5OLlG3d,"[deep adult standards, doo-wop, rhythm and blues]",False,2:38,D♯/E♭,Major,4
4,1xVOttVNT27FBTD8iHjOfU,It's Only Make Believe,7gi3jmwpUpNWdswT8eEprF,Conway Twitty,3niZ3iipFSFzO6335VBZiP,"[arkansas country, classic country pop, countr...",False,2:12,B,Major,4
