In [None]:
import json
import os
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
import boto3
import datetime

###

#you need to sign up spotify for developer to get a SPOTIPY_CLIENT_ID and SPOTIPY_CLIENT_SECRET

###

def lambda_handler(event, context):
    # connect to spotipy
    SPOTIPY_CLIENT_ID = os.environ.get('SPOTIPY_CLIENT_ID')
    SPOTIPY_CLIENT_SECRET = os.environ.get('SPOTIPY_CLIENT_SECRET')
    playlist_id = os.environ.get('playlist_id')
    
    auth_manager = SpotifyClientCredentials(client_id = SPOTIPY_CLIENT_ID, client_secret = SPOTIPY_CLIENT_SECRET)
    sp = spotipy.Spotify(auth_manager=auth_manager)
    #extract data from API
    data = sp.playlist_tracks(playlist_id)
    
    #get current date
    now = datetime.datetime.now()
    year = '{:02d}'.format(now.year)
    month = '{:02d}'.format(now.month)
    day = '{:02d}'.format(now.day)
    day_month_year = '{}-{}-{}'.format(year, month, day)
    df = []
    audio_features = []
    #extract data 
    for r in data['items']:
        song_id = r['track']['id']
        song_name = r['track']['name']
        song_duration_ms = r['track']['duration_ms']
        song_url = r['track']['external_urls']['spotify']
        song_popularity = r['track']['popularity']
        album_id = r['track']['album']['id']
        album_type = r['track']['album']['album_type']
        album_tracks = r['track']['album']['total_tracks']
        album_name = r['track']['album']['name']
        album_release_date = r['track']['album']['release_date']
        album_url = r['track']['album']['external_urls']['spotify']
        artist_id = r['track']['album']['artists'][0]['id']
        artist_name = r['track']['album']['artists'][0]['name']
        artist_type = sp.search(q='artist:' + artist_name, type='artist')['artists']['items'][0]['genres']
        artist_followers = sp.search(q='artist:' + artist_name, type='artist')['artists']['items'][0]['followers']['total']
        artist_popularity = sp.search(q='artist:' + artist_name, type='artist')['artists']['items'][0]['popularity']
        artist_url = r['track']['album']['artists'][0]['external_urls']['spotify']
        record_date = day_month_year
        attribute = {'track_id': song_id,'track_name':song_name,'track_duration':song_duration_ms,'track_url':song_url,
                    'track_popularity':song_popularity, 
                    'album_id':album_id , 'album_name': album_name , 'album_type':album_type,  'album_tracks':album_tracks, 
                    'album_url':album_url, 'album_release_date':album_release_date , 
                    'artist_id':artist_id , 'artist_name': artist_name, 'artist_url':artist_url, 
                    'artist_genre':artist_type, 'artist_followers': artist_followers, 'artist_popularity':artist_popularity,'record_date':record_date
            }
        df.append(attribute)
        feature = sp.audio_features(song_url)[0]
        audio_features.append(feature)
    # remove some attributes    
    rem_list = ['id', 'track_href','uri','type','analysis_url']
    for row in audio_features:
        for key in rem_list:
            if key in row:
                del row[key]
                
    final_df = []
    for i in range(len(audio_features)):
        res = {**df[i],**audio_features[i]}
        final_df.append(res)
        
    filename = 'spotify_raw_'+ day_month_year + '.json'
    #drop extracted data into 
    client = boto3.client('s3')
    client.put_object(
        Bucket = 'spotify-project-jr',
        Key  = 'raw_data/raw/' + filename,
        Body = json.dumps(final_df)
        )
