In [None]:
import sys
import boto3
import pandas as pd
import base64
import requests
import json
from datetime import datetime

In [None]:
def get_headers(client_id, client_secret):
    endpoint = 'https://accounts.spotify.com/api/token'
    encoded = base64.b64encode((client_id+':'+client_secret).encode('utf-8')).decode('ascii')
    headers = {'Authorization': 'Basic {auth}'.format(auth=encoded)}
    payload = {'grant_type':'client_credentials'}
    try:
        r = requests.post(endpoint, data=payload, headers=headers)
    except:
        print('인증오류가 발생되었습니다!')
        sys.exit(1)
    if r.status_code!=200:
        if r.status_code==401:
            print('Client_id와 Secret이 잘못되었습니다!')
            sys.exit(1)
        else:
            print('인증오류가 발생되었습니다!:', r.status_code)
            sys.exit(1)
    acess_token = json.loads(r.text).get('access_token')
    headers = {'Authorization': 'Bearer {auth}'.format(auth=acess_token)}
    return headers

In [None]:
def main():
    try:
        dynamodb = boto3.resource('dynamodb',
                                 region_name='ap-northeast-2',
                                 endpoint_url='http://dynamodb.ap-northeast-2.amazonaws.com')
    except:
        print('DynamoDB접속오류!')
        sys.exit(1)

    table = dynamodb.Table('top_tracks')

    response = table.scan()
    response = response['Items']
    data = pd.DataFrame(response)
    data = data[['artist_id','track_id','track_name','popularity','external_url']]
    tracks_batch = [data.track_id[i:i+100].values for i in range(0, len(data), 100)]
    audio_features = []
    headers = get_headers(client_id, client_secret)
    for i in tracks_batch:
        ids = ','.join(i)
        url = 'https://api.spotify.com/v1/audio-features/?ids={}'.format(ids)
        try:
            r = requests.get(url, headers=headers)
        except:
            print('API호출오류')
            sys.exit(1)
        raw = json.loads(r.text)
        audio_features.extend(raw['audio_features'])
        
    audio_features = [i for i in audio_features if i is not None]
    audio_features = pd.DataFrame(audio_features)
    audio_features = audio_features.rename(columns={'id':'track_id'})
    audio_features.to_parquet('audio_features.parquet', engine='pyarrow',
                    compression='snappy')
    dt = datetime.utcnow().strftime('%Y-%m-%d')
    s3 = boto3.resource('s3', region_name='ap-northeast-2')
    ob = s3.Object('hyun-artist', 'audio_features/dt={}/audio_features.parquet'.format(dt))
    data = open('audio_features.parquet', 'rb')
    ob.put(Body=data)

In [None]:
main()

In [None]:
if __name__ == '__main__':
    client_id = '9e22029795364c83be0d5c1c8ea99657'
    client_secret = ''
    main()