In [1]:
#importing necessary libraries
import requests
import json
import pandas as pd
from matplotlib import pyplot as plt
import datetime
from decouple import config
from collections import Counter 

In [2]:
def data_validation(df):
    #checking if the dataframe is empty
    if df.empty:
        print('Yikes, No songs were downloaded')

    #checking if there are duplicates
    if pd.Series(df['played_at']).is_unique:
        pass
    else:
        raise Exception("Yikes Primary Key check violated")

    #checking for nulls
    if df.isnull().values.any():
        raise Exception("Yike!!! Found Null values")
    
    return True


def frequencies(column_name):
    
    print(Counter(songs_df[column_name].values))
    print(dict(Counter(songs_df[column_name].values).most_common(5)))
    
    bar_plot = dict(Counter(songs_df[column_name].values).most_common(5))
    
    plt.bar(*zip(*bar_plot.items()))
    plt.show()

In [3]:
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": f"Bearer {config('TOKEN')}"
}

In [4]:
#getting today's date
today = datetime.datetime.now()
print("today:",today)

today: 2021-07-06 04:05:54.141984


In [5]:
#getting last month's date
last_month = today - datetime.timedelta(days=300)
print("last_month",last_month)

last_month 2020-09-09 04:05:54.141984


In [6]:
#converting last month's date to unix time stamp
last_month_unix_timestamp = int(last_month.timestamp()) * 1000
print("last_month_Unix",last_month_unix_timestamp)

last_month_replace = last_month.replace(hour = 0, minute = 0, second = 0, microsecond = 0)
print("last_month_Replace:",last_month_replace)

last_month_Unix 1599624354000
last_month_Replace: 2020-09-09 00:00:00


In [None]:
url = f"https://api.spotify.com/v1/me/player/recently-played?after={last_month_unix_timestamp}"
r = requests.get(url, headers = headers)

In [None]:
data = r.json()
data

In [None]:
song_names = []
artist_name = []
played_at_list = []
timestamps = []

for song in data["items"]:
    song_names.append(song['track']['name'])
    artist_name.append(song['track']['album']['artists'][0]['name'])
    played_at_list.append(song['played_at'])
    timestamps.append(song['played_at'][0:10])

In [None]:
songs_dict = {
    "song_names":song_names,
    "artist_name":artist_name,
    "played_at":played_at_list,
    "timestamp":timestamps
}
songs_df = pd.DataFrame(songs_dict)
print(songs_df)

In [None]:
if data_validation(songs_df):
    print("Data Validation Passed!")

In [None]:
# viewing the number of rows and columns
songs_df.shape

In [None]:
#viewing summary of a DataFrame
songs_df.info()

In [None]:
#viewing some basic statistical details
songs_df.describe()

In [None]:
#returning the first 2 rows of the dataframe
songs_df.head()

## Which 5 songs did I listen to frequently?

In [None]:
frequencies('song_names')

## Who are the top 5 artistes I listened to?

In [None]:
frequencies('artist_name')

## How many songs did I listen to at each period of time in the day?

In [None]:
#converting played_at column to a datetime object
songs_df['played_at']=pd.to_datetime(songs_df['played_at'])

#creating a new column, period
songs_df['period'] = (songs_df['played_at'].dt.hour % 24 + 4) // 4
songs_df['period'].replace({1: 'Late Night',
                      2: 'Early Morning',
                      3: 'Morning',
                      4: 'Noon',
                      5: 'Evening',
                      6: 'Night'}, inplace=True)

In [None]:
songs_df

In [None]:
frequencies('period')