In [1]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

In [36]:
def analyze_playlist(creator, pl_id):
    audio_feature_cols = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
                          'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
    df = pd.DataFrame(columns=['name', 'album', 'artist', 'release_date', 'popularity', 'id']+audio_feature_cols)
    
    playlist = sp.user_playlist_tracks(creator, pl_id)['items']
    for track_info in playlist:
        track = track_info['track']
        
        name = track['name']
        artist = track['artists'][0]['name']
        album = track['album']['name']
        release_date = track['album']['release_date']
        popularity = track['popularity']
        t_id = track['id']
        
        track_extracted_data = [name, album, artist, release_date, popularity, t_id]
        
        audio_features = sp.audio_features(track['id'])
        for feature in audio_feature_cols:
            track_extracted_data.append(audio_features[0][feature])
        
        df.loc[len(df)] = track_extracted_data
    
    return df

In [5]:
rock_60s = analyze_playlist('Spotify', '37i9dQZF1DWWzBc3TOlaAV')
rock_70s = analyze_playlist('Spotify', '37i9dQZF1DWWwzidNQX6jx')
rock_80s = analyze_playlist('Spotify', '37i9dQZF1DX1spT6G94GFC')
rock_90s = analyze_playlist('Spotify', '37i9dQZF1DX1rVvRgjX59F')
rock_00s = analyze_playlist('Spotify', '37i9dQZF1DX3oM43CtKnRV')
rock_classics = analyze_playlist('Spotify', '37i9dQZF1DWXRqgorJj26U')
rock_df = pd.concat([rock_60s, rock_70s, rock_80s, rock_90s, rock_00s, rock_classics])
rock_df.drop_duplicates(inplace=True)
rock_df.to_csv('rock.csv', index=False)

In [10]:
rock_df.shape

(528, 19)

In [12]:
pop_70s = analyze_playlist('Spotify', '37i9dQZF1DWTJ7xPn4vNaz')
pop_80s = analyze_playlist('Spotify', '37i9dQZF1DX4UtSsGT1Sbe')
pop_90s = analyze_playlist('Spotify', '37i9dQZF1DXbTxeAdrVG2l')
pop_00s = analyze_playlist('Spotify', '37i9dQZF1DX4o1oenSJRJd')
pop_10s = analyze_playlist('Spotify', '37i9dQZF1DX5Ejj0EkURtP')
pop_classics = analyze_playlist('Spotify', '37i9dQZF1DX4v0Y84QklHD')
pop_df = pd.concat([pop_70s, pop_80s, pop_90s, pop_00s, pop_10s, pop_classics])
pop_df.drop_duplicates(inplace=True)
pop_df.to_csv('pop.csv', index=False)

In [13]:
pop_df.shape

(534, 19)

In [16]:
metal_1 = analyze_playlist('Spotify', '37i9dQZF1EQpgT26jgbgRI')
metal_2 = analyze_playlist('Spotify', '37i9dQZF1DX2LTcinqsO68')
metal_3 = analyze_playlist('Spotify', '37i9dQZF1DX08jcQJXDnEQ')
hip_hop = analyze_playlist('Spotify', '37i9dQZF1EQnqst5TRi17F')
pop_1 = analyze_playlist('Spotify', '37i9dQZF1EQncLwOalG3K7')
pop_2 = analyze_playlist('maka_97', '6mtYuOxzl58vSGnEDtZ9uB')
k_pop = analyze_playlist('Spotify', '37i9dQZF1EQpesGsmIyqcW')
jazz = analyze_playlist('Spotify', '37i9dQZF1EQqA6klNdJvwx')
rnb = analyze_playlist('Spotify', '37i9dQZF1EQoqCH7BwIYb7')
classical = analyze_playlist('Spotify', '37i9dQZF1DWWEJlAGA9gs0')
EDM = analyze_playlist('Yoshley', '3mb6U0hqR0N9A2uZXQH6se')

In [17]:
df_1 = pd.concat([metal_1, metal_2, hip_hop, pop_1, pop_2, k_pop, jazz, rnb, classical, EDM])
df_1.drop_duplicates(inplace=True)
df_1.to_csv('songs1.csv', index=False)

In [18]:
df_1.shape

(693, 19)

In [25]:
rock_df = pd.read_csv('rock.csv')
pop_df = pd.read_csv('pop.csv')
df_1 = pd.read_csv('songs1.csv')
df = pd.concat([rock_df, pop_df, df_1])
df.drop_duplicates(inplace=True)
df.drop_duplicates(subset=['id'], inplace=True)
df.to_csv('data.csv', index=False)

In [26]:
df.shape

(1684, 19)