In [None]:
import json
import boto3
import pandas as pd
import datetime
from io import StringIO


def lambda_handler(event, context):
    s3 = boto3.client('s3')
    Bucket = 'spotify-project-jr'
    folder  = 'raw_data/raw/'


    #get current date
    now = datetime.datetime.now()
    year = '{:02d}'.format(now.year)
    month = '{:02d}'.format(now.month)
    day = '{:02d}'.format(now.day)
    day_month_year = '{}-{}-{}'.format(year, month, day)
    
    filename = 'spotify_raw_'+ day_month_year + '.json'
    
    
    #find the data from S3
    for file in s3.list_objects(Bucket=Bucket, Prefix=folder)['Contents']:
        file_key = file['Key']
        #only get the data that is extracted today
        folder_name = file_key.split('/')[-1]
        if folder_name == filename:
            response = s3.get_object(Bucket = Bucket, Key = file_key)
            content = response['Body']
            jsonObject = json.loads(content.read())

    df = pd.DataFrame.from_dict(jsonObject)
    
    #find the data for table track
    track_key = "transformed_data/track_data/track_transformed_" + day_month_year + ".csv"
    track_df = df[df.columns[0:5]]
    track_df['record_date'] = day_month_year 
    audio_feature = df[df.columns[19:]]
    track_df = pd.concat([track_df, audio_feature], axis=1)
    track_buffer=StringIO()
    track_df.to_csv(track_buffer, index=False)
    track_content = track_buffer.getvalue()
    s3.put_object(Bucket=Bucket, Key=track_key, Body=track_content)
    
    
    #find the data for table album
    album_df = df[df.columns[5:11]]
    album_df['record_date'] = day_month_year
    album_key = "transformed_data/album_data/album_transformed_" + day_month_year + ".csv"
    album_buffer=StringIO()
    album_df.to_csv(album_buffer, index=False)
    album_content = album_buffer.getvalue()
    s3.put_object(Bucket=Bucket, Key=album_key, Body=album_content)   
    
    
    
    #find the data for table artist
    artist_df = df[df.columns[11:18]]
    artist_df['record_date'] = day_month_year
    artist_key = "transformed_data/artist_data/artist_transformed_" + day_month_year + ".csv"
    artist_buffer=StringIO()
    artist_df.to_csv(artist_buffer, index=False)
    artist_content = artist_buffer.getvalue()
    s3.put_object(Bucket=Bucket, Key=artist_key, Body=artist_content)   
    
    #find the data for table top songs global
    top_songs_global_key = "transformed_data/top_songs_global_data/top_songs_global_transformed_" + day_month_year + ".csv"    
    top_songs_global_df = df[['track_id','artist_id','album_id','record_date']]
    top_songs_global_buffer=StringIO()
    top_songs_global_df.to_csv(top_songs_global_buffer, index=False)
    top_songs_global_content = top_songs_global_buffer.getvalue()
    s3.put_object(Bucket=Bucket, Key=top_songs_global_key, Body=top_songs_global_content)