In [1]:
import sys
import boto3
import requests
import base64
import json
import pandas as pd
from datetime import datetime
import jsonpath

In [2]:
def get_headers(client_id, client_secret):
    endpoint = 'https://accounts.spotify.com/api/token'
    encoded = base64.b64encode((client_id+':'+client_secret).encode('utf-8')).decode('ascii')
    headers = {'Authorization': 'Basic {auth}'.format(auth=encoded)}
    payload = {'grant_type':'client_credentials'}
    try:
        r = requests.post(endpoint, data=payload, headers=headers)
    except:
        print('인증오류가 발생되었습니다!')
        sys.exit(1)
    if r.status_code!=200:
        if r.status_code==401:
            print('Client_id와 Secret이 잘못되었습니다!')
            sys.exit(1)
        else:
            print('인증오류가 발생되었습니다!:', r.status_code)
            sys.exit(1)
    acess_token = json.loads(r.text).get('access_token')
    headers = {'Authorization': 'Bearer {auth}'.format(auth=acess_token)}
    return headers

In [3]:
client_id = '9e22029795364c83be0d5c1c8ea99657'
client_secret = '70ad0771afa2461cb64ce15843e80d10'

In [3]:
try:
    dynamodb = boto3.resource('dynamodb',
                             region_name='ap-northeast-2',
                             endpoint_url='http://dynamodb.ap-northeast-2.amazonaws.com')
except:
    print('DynamoDB접속오류!')
    sys.exit(1)

table = dynamodb.Table('top_tracks')
table
# response = table.scan()
# response = response['Items']

dynamodb.Table(name='top_tracks')

In [5]:
data = pd.DataFrame(response)
data = data[['artist_id','track_id','track_name','popularity','external_url']]
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4963 entries, 0 to 4962
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   artist_id     4963 non-null   object
 1   track_id      4963 non-null   object
 2   track_name    4963 non-null   object
 3   popularity    4963 non-null   object
 4   external_url  4963 non-null   object
dtypes: object(5)
memory usage: 194.0+ KB


In [6]:
data.to_parquet('top_tracks.parquet', engine='pyarrow',
                compression='snappy')
dt = datetime.utcnow().strftime('%Y-%m-%d')
s3 = boto3.resource('s3', region_name='ap-northeast-2')
ob = s3.Object('moon-artist', 'top-tracks/dt={}/top_tracks.parquet'.format(dt))
data = open('top_tracks.parquet', 'rb')
ob.put(Body=data)

{'ResponseMetadata': {'RequestId': '168C6T6HHA9B1QFF',
  'HostId': 'hZp7zWDzMteq6NK/x188VK2IIMQ8lCpqC9KQKYIEyuyd269ytx8e6OEtLcSKs8QyOI6N0q4ImcI=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'hZp7zWDzMteq6NK/x188VK2IIMQ8lCpqC9KQKYIEyuyd269ytx8e6OEtLcSKs8QyOI6N0q4ImcI=',
   'x-amz-request-id': '168C6T6HHA9B1QFF',
   'date': 'Mon, 01 Nov 2021 05:28:16 GMT',
   'etag': '"206527adc60c4285c538f246ccf1f93c"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"206527adc60c4285c538f246ccf1f93c"'}

In [7]:
data = pd.read_parquet('top_tracks.parquet')
tracks_batch = [data.track_id[i:i+100].values for i in range(0, len(data), 100)]
audio_features = []
headers = get_headers(client_id, client_secret)
for i in tracks_batch:
    ids = ','.join(i)
    url = 'https://api.spotify.com/v1/audio-features/?ids={}'.format(ids)
    try:
        r = requests.get(url, headers=headers)
    except:
        print('API호출오류')
        sys.exit(1)
    raw = json.loads(r.text)
    audio_features.extend(raw['audio_features'])

In [8]:
audio_features = pd.DataFrame(audio_features)
audio_features = audio_features.rename(columns={'id':'track_id'})
audio_features.to_parquet('audio_features.parquet', engine='pyarrow',
                compression='snappy')
s3 = boto3.resource('s3', region_name='ap-northeast-2')
ob = s3.Object('moon-artist', 'audio_features/dt={}/audio_features.parquet'.format(dt))
data = open('audio_features.parquet', 'rb')
ob.put(Body=data)

{'ResponseMetadata': {'RequestId': 'T169YS97TGXGPN2V',
  'HostId': 'l4Pe2HivuA2QZFtLIjIA6ER3wcpF1hLeoI3EGz/0i9OT9qcBq/A4jcXfAYf+WwMPx0vQp+4PS+0=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'l4Pe2HivuA2QZFtLIjIA6ER3wcpF1hLeoI3EGz/0i9OT9qcBq/A4jcXfAYf+WwMPx0vQp+4PS+0=',
   'x-amz-request-id': 'T169YS97TGXGPN2V',
   'date': 'Mon, 01 Nov 2021 05:28:26 GMT',
   'etag': '"53832049aae793eaef823aff9ee56197"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"53832049aae793eaef823aff9ee56197"'}