In [1]:
import sys

import spotipy
from spotipy.oauth2 import SpotifyOAuth


class SpotifyApi:
    def __init__(self, client_id, client_secret, redirect_uri, scope):
        self.client_id = client_id
        self.client_secret = client_secret
        self.redirect_uri = redirect_uri
        self.scope = scope

    def connect_to_spotify(self):
        token = SpotifyOAuth(client_id=self.client_id,
                             client_secret=self.client_secret,
                             redirect_uri=self.redirect_uri,
                             scope=self.scope)
        # print(token)
        if not token:
            sys.exit('Authorization failed')
        sp = spotipy.Spotify(auth_manager=token)
        return sp

In [2]:
if __name__ == "__main__":

    # 设置spotipy应用接口参数   me
    CLIENT_ID = "85abc35b96044909a7f9271e944604c7"
    CLIENT_SECRET = "87da784091c94768997ba35ae54c995f"
    REDIRECT_URI = "https://cn.bing.com/"
    SCOPE = "user-library-read"

    # 获取token信息
    # token = spotipy.util.prompt_for_user_token(username, scope)
    spotipy_obj = SpotifyApi(CLIENT_ID, CLIENT_SECRET, REDIRECT_URI, SCOPE).connect_to_spotify()
    # 根据操作对象获取当前用户所收藏的曲目列表【limit：最多查询50首】
    saved_tracks_resp = spotipy_obj.current_user_saved_tracks(limit=50)

    number_of_tracks = saved_tracks_resp["total"]
    print("共获取 %d 首音乐" % number_of_tracks)

共获取 3 首音乐


In [3]:
for idx, item in enumerate(saved_tracks_resp['items']):
        track = item['track']
        print(idx, track['artists'][0]['name'], " – ", track['name'])

0 Harry Styles  –  As It Was
1 Lizzo  –  About Damn Time
2 Jack Harlow  –  First Class


In [6]:
saved_tracks_resp['items'][0]

{'added_at': '2022-05-21T14:54:35Z',
 'track': {'album': {'album_type': 'single',
   'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6KImCVD70vtIoJWnq6nGn3'},
     'href': 'https://api.spotify.com/v1/artists/6KImCVD70vtIoJWnq6nGn3',
     'id': '6KImCVD70vtIoJWnq6nGn3',
     'name': 'Harry Styles',
     'type': 'artist',
     'uri': 'spotify:artist:6KImCVD70vtIoJWnq6nGn3'}],
   'available_markets': ['AD',
    'AE',
    'AG',
    'AL',
    'AM',
    'AO',
    'AR',
    'AT',
    'AU',
    'AZ',
    'BA',
    'BB',
    'BD',
    'BE',
    'BF',
    'BG',
    'BH',
    'BI',
    'BJ',
    'BN',
    'BO',
    'BR',
    'BS',
    'BT',
    'BW',
    'BY',
    'BZ',
    'CA',
    'CD',
    'CG',
    'CH',
    'CI',
    'CL',
    'CM',
    'CO',
    'CR',
    'CV',
    'CW',
    'CY',
    'CZ',
    'DE',
    'DJ',
    'DK',
    'DM',
    'DO',
    'DZ',
    'EC',
    'EE',
    'EG',
    'ES',
    'FI',
    'FJ',
    'FM',
    'FR',
    'GA',
    'GB',
    'GD',
    '

In [7]:
# 只保存有用的字段数据
def save_only_some_fields(track_response):
    return {
        'id': str(track_response['track']['id']),
        'name': str(track_response['track']['name']),
        'artists': [artist['name'] for artist in track_response['track']['artists']],
        'duration_ms': track_response['track']['duration_ms'],
        'popularity': track_response['track']['popularity'],
        'added_at': track_response['added_at']
    }


tracks = [save_only_some_fields(track) for track in saved_tracks_resp['items']]

In [8]:
tracks

[{'id': '4LRPiXqCikLlN15c3yImP7',
  'name': 'As It Was',
  'artists': ['Harry Styles'],
  'duration_ms': 167303,
  'popularity': 100,
  'added_at': '2022-05-21T14:54:35Z'},
 {'id': '1PckUlxKqWQs3RlWXVBLw3',
  'name': 'About Damn Time',
  'artists': ['Lizzo'],
  'duration_ms': 191822,
  'popularity': 90,
  'added_at': '2022-05-21T14:54:34Z'},
 {'id': '1rDQ4oMwGJI7B4tovsBOxc',
  'name': 'First Class',
  'artists': ['Jack Harlow'],
  'duration_ms': 173947,
  'popularity': 94,
  'added_at': '2022-05-21T14:54:33Z'}]

In [10]:
saved_tracks_resp.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [11]:
saved_tracks_resp['next']

In [12]:
import pandas as pd

while saved_tracks_resp['next']:
    saved_tracks_resp = spotipy_obj.next(saved_tracks_resp)
    tracks.extend([save_only_some_fields(track) for track in saved_tracks_resp['items']])

In [13]:
tracks_df = pd.DataFrame(tracks)
pd.set_option('display.max_rows', len(tracks))

In [14]:
tracks_df

Unnamed: 0,id,name,artists,duration_ms,popularity,added_at
0,4LRPiXqCikLlN15c3yImP7,As It Was,[Harry Styles],167303,100,2022-05-21T14:54:35Z
1,1PckUlxKqWQs3RlWXVBLw3,About Damn Time,[Lizzo],191822,90,2022-05-21T14:54:34Z
2,1rDQ4oMwGJI7B4tovsBOxc,First Class,[Jack Harlow],173947,94,2022-05-21T14:54:33Z


In [15]:
tracks_df['artists'] = tracks_df['artists'].apply(lambda artists: artists[0])
tracks_df['duration_ms'] = tracks_df['duration_ms'].apply(lambda duration: duration / 1000)

tracks_df = tracks_df.rename(columns={'duration_ms': 'duration_s'})

In [16]:
tracks_df

Unnamed: 0,id,name,artists,duration_s,popularity,added_at
0,4LRPiXqCikLlN15c3yImP7,As It Was,Harry Styles,167.303,100,2022-05-21T14:54:35Z
1,1PckUlxKqWQs3RlWXVBLw3,About Damn Time,Lizzo,191.822,90,2022-05-21T14:54:34Z
2,1rDQ4oMwGJI7B4tovsBOxc,First Class,Jack Harlow,173.947,94,2022-05-21T14:54:33Z


In [20]:
audio_features = {}

for idd in tracks_df['id']:
    audio_features[idd] = spotipy_obj.audio_features(idd)[0]

4LRPiXqCikLlN15c3yImP7
1PckUlxKqWQs3RlWXVBLw3
1rDQ4oMwGJI7B4tovsBOxc


In [26]:
audio_features['4LRPiXqCikLlN15c3yImP7']

{'danceability': 0.52,
 'energy': 0.731,
 'key': 6,
 'loudness': -5.338,
 'mode': 0,
 'speechiness': 0.0557,
 'acousticness': 0.342,
 'instrumentalness': 0.00101,
 'liveness': 0.311,
 'valence': 0.662,
 'tempo': 173.93,
 'type': 'audio_features',
 'id': '4LRPiXqCikLlN15c3yImP7',
 'uri': 'spotify:track:4LRPiXqCikLlN15c3yImP7',
 'track_href': 'https://api.spotify.com/v1/tracks/4LRPiXqCikLlN15c3yImP7',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4LRPiXqCikLlN15c3yImP7',
 'duration_ms': 167303,
 'time_signature': 4}

In [23]:
tracks_df

Unnamed: 0,id,name,artists,duration_s,popularity,added_at
0,4LRPiXqCikLlN15c3yImP7,As It Was,Harry Styles,167.303,100,2022-05-21T14:54:35Z
1,1PckUlxKqWQs3RlWXVBLw3,About Damn Time,Lizzo,191.822,90,2022-05-21T14:54:34Z
2,1rDQ4oMwGJI7B4tovsBOxc,First Class,Jack Harlow,173.947,94,2022-05-21T14:54:33Z


In [43]:
# 根据需求更改 unneeded_features
unneeded_features = {"type", "id", "uri", "track_href", "analysis_url", "duration_ms"}
all_features = set(audio_features[set(audio_features.keys()).pop()].keys())
reserved_features = list(all_features.difference(unneeded_features))
reserved_features

['valence',
 'speechiness',
 'energy',
 'loudness',
 'acousticness',
 'tempo',
 'liveness',
 'danceability',
 'time_signature',
 'instrumentalness',
 'key',
 'mode']

In [44]:
for  feature in reserved_features:
    tracks_df[feature] = tracks_df['id'].apply(lambda idd: audio_features[idd][feature])

In [45]:
tracks_df

Unnamed: 0,id,name,artists,duration_s,popularity,added_at,valence,speechiness,energy,loudness,acousticness,tempo,liveness,danceability,time_signature,instrumentalness,key,mode
0,4LRPiXqCikLlN15c3yImP7,As It Was,Harry Styles,167.303,100,2022-05-21T14:54:35Z,0.662,0.0557,0.731,-5.338,0.342,173.93,0.311,0.52,4,0.00101,6,0
1,1PckUlxKqWQs3RlWXVBLw3,About Damn Time,Lizzo,191.822,90,2022-05-21T14:54:34Z,0.722,0.0656,0.743,-6.305,0.0995,108.966,0.335,0.836,4,0.0,10,0
2,1rDQ4oMwGJI7B4tovsBOxc,First Class,Jack Harlow,173.947,94,2022-05-21T14:54:33Z,0.324,0.102,0.563,-6.135,0.0254,106.998,0.113,0.905,4,1e-05,8,1


### 完整API Class

In [47]:
import sys

import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth


class SpotifyApi:
    def __init__(self, client_id, client_secret, redirect_uri, scope):
        self.client_id = client_id
        self.client_secret = client_secret
        self.redirect_uri = redirect_uri
        self.scope = scope

    def connect_to_spotify(self):
        token = SpotifyOAuth(client_id=self.client_id,
                             client_secret=self.client_secret,
                             redirect_uri=self.redirect_uri,
                             scope=self.scope)
        # print(token)
        if not token:
            sys.exit('Authorization failed')
        sp = spotipy.Spotify(auth_manager=token)
        return sp

    # 只保存有用的字段数据
    def _save_only_some_fields(self, track_response):
        return {
            'id': str(track_response['track']['id']),
            'name': str(track_response['track']['name']),
            'artists': [artist['name'] for artist in track_response['track']['artists']],
            'duration_ms': track_response['track']['duration_ms'],
            'popularity': track_response['track']['popularity'],
            'added_at': track_response['added_at']
        }

    def _add_other_features(self, spotipy_obj, tracks_df):
        # generate features  dict
        audio_features = {}
        for idd in tracks_df['id']:
            audio_features[idd] = spotipy_obj.audio_features(idd)[0]

        # 根据需求更改 unneeded_features
        unneeded_features = {"type", "id", "uri", "track_href", "analysis_url", "duration_ms"}
        all_features = set(audio_features[set(audio_features.keys()).pop()].keys())
        reserved_features = list(all_features.difference(unneeded_features))
        # print(reserved_features)

        for feature in reserved_features:
            tracks_df[feature] = tracks_df['id'].apply(lambda idd: audio_features[idd][feature])

        return tracks_df

    def get_track_df(self, max_query_num):
        """
        :param max_query_num: Get the max number of the current user's favorite track list.
        :return: DataFrame : User's favorite songs and corresponding features
        """
        # 连接到spotify并获得操作对象
        spotipy_obj = self.connect_to_spotify()
        # 根据操作对象获取当前用户所收藏的曲目列表【limit：最多查询 max_query_num 首】
        saved_tracks_resp = spotipy_obj.current_user_saved_tracks(limit=max_query_num)

        # 输出用户当前收藏的音乐数（小于等于max_query_num）
        # number_of_tracks = saved_tracks_resp["total"]
        # print("共获取 %d 首音乐" % number_of_tracks)

        tracks = [self._save_only_some_fields(track) for track in saved_tracks_resp['items']]

        # 可选项
        while saved_tracks_resp['next']:
            saved_tracks_resp = spotipy_obj.next(saved_tracks_resp)
            tracks.extend([self._save_only_some_fields(track) for track in saved_tracks_resp['items']])

        # 转换为DataFrame
        tracks_df = pd.DataFrame(tracks)
        # 转换列的数据
        tracks_df['artists'] = tracks_df['artists'].apply(lambda artists: artists[0])
        tracks_df['duration_ms'] = tracks_df['duration_ms'].apply(lambda duration: duration / 1000)
        tracks_df = tracks_df.rename(columns={'duration_ms': 'duration_s'})

        return self._add_other_features(spotipy_obj, tracks_df)

In [48]:
# 设置spotipy应用接口参数   me
CLIENT_ID = "85abc35b96044909a7f9271e944604c7"
CLIENT_SECRET = "87da784091c94768997ba35ae54c995f"
REDIRECT_URI = "https://cn.bing.com/"
SCOPE = "user-library-read"

# 获取SpotifyApi对象
sp_api = SpotifyApi(CLIENT_ID, CLIENT_SECRET, REDIRECT_URI, SCOPE)
# 根据操作对象获取当前用户所收藏的曲目列表【最多查询50首】
song_data = sp_api.get_track_df(max_query_num=50)
song_data

Unnamed: 0,id,name,artists,duration_s,popularity,added_at,valence,speechiness,energy,loudness,acousticness,tempo,liveness,danceability,time_signature,instrumentalness,key,mode
0,4LRPiXqCikLlN15c3yImP7,As It Was,Harry Styles,167.303,100,2022-05-21T14:54:35Z,0.662,0.0557,0.731,-5.338,0.342,173.93,0.311,0.52,4,0.00101,6,0
1,1PckUlxKqWQs3RlWXVBLw3,About Damn Time,Lizzo,191.822,90,2022-05-21T14:54:34Z,0.722,0.0656,0.743,-6.305,0.0995,108.966,0.335,0.836,4,0.0,10,0
2,1rDQ4oMwGJI7B4tovsBOxc,First Class,Jack Harlow,173.947,94,2022-05-21T14:54:33Z,0.324,0.102,0.563,-6.135,0.0254,106.998,0.113,0.905,4,1e-05,8,1
