In [2]:
import ast
from typing import List
from os import listdir
def get_streamings(path: str = 'MyData') -> List[dict]:
    files = ['MyData/' + x for x in listdir(path)
            if x.split('.')[0][:1] == 'StreamingHistory']
    
    all_streamings = []

    for file in files:
        with open(file, 'r', encdoing='UTF-8') as f:
            new_streaming = ast.literal_eval(f.read())
            all_streamings += [streaming for streaming
                                in new_streaming]
    return all_streamings


In [3]:
import spotipy.util as util

username = 'lukeyburge'
client_id = '6cd330add6d0478395611e104d584dce'
client_secret = '1282d9f21bc646deaf9f349b0376e6dc'
redirect_uri = 'http://localhost:7777/callback'
scope = 'user-read-recently-played'

token = util.prompt_for_user_token(username=username,
                                   scope=scope,
                                   client_id=client_id,
                                   client_secret=client_secret,
                                   redirect_uri=redirect_uri)


In [15]:
import requests


def get_id(track_name: str, token: str) -> str:
    headers = {
    'Accept': 'application/json',
    'Content-Type': 'application/json',
    'Authorization': f'Bearer ' + token,
    }
    params = [
    ('q', track_name),
    ('type', 'track'),
    ]
    try:
        response = requests.get('https://api.spotify.com/v1/search', 
                    headers = headers, params = params, timeout = 5)
        json = response.json()
        first_result = json['tracks']['items'][0]
        track_id = first_result['id']
        print(first_result['name'])
        return track_id
    except:
        return None

lucy_id = get_id('lucy', token)
print(lucy_id)


Sex on Fire
5A1FmxbYVRZKy4nc16MAue


In [16]:
import spotipy as spotipy

def get_features(track_id: str, token: str) -> dict:
    sp = spotipy.Spotify(auth=token)
    try:
        features = sp.audio_features([track_id])
        return features[0]
    except:
        return None


lucy_features = get_features(lucy_id, token)
print(lucy_features)


{'danceability': 0.542, 'energy': 0.905, 'key': 9, 'loudness': -5.653, 'mode': 1, 'speechiness': 0.054, 'acousticness': 0.00172, 'instrumentalness': 0.0104, 'liveness': 0.136, 'valence': 0.374, 'tempo': 153.398, 'type': 'audio_features', 'id': '5A1FmxbYVRZKy4nc16MAue', 'uri': 'spotify:track:5A1FmxbYVRZKy4nc16MAue', 'track_href': 'https://api.spotify.com/v1/tracks/5A1FmxbYVRZKy4nc16MAue', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/5A1FmxbYVRZKy4nc16MAue', 'duration_ms': 203347, 'time_signature': 4}


In [None]:
import pandas as pd

streamings = get_streamings()
unique_tracks = list(set([streaming['trackName']
                          for streaming in streamings]))

all_features = {}
for track in unique_tracks:
    track_id = get_id(track, token)
    features = get_features(track_id, token)
    if features:
        all_features[track] = features

with_features = []
for track_name, features in all_features.items():
    with_features.append({'name': track_name, **features})

df = pd.DataFrame(with_features)
df.to_csv('streaming_history.csv')

In [None]:
from collections import namedtuple
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(font='georgia')
sns.set_style("ticks")


In [None]:

df = pd.read_csv('streaming_history.csv', index_col=0)
df.shape


In [None]:
#tracks playing time
df['secPlayed'] = df['msPlayed'] / 1000
# moving seconds column to proper place
df = df[df.columns[:-1].insert(4, df.columns[-1])]
# removing songs that were played for less than 60 secs
df = df[df.secPlayed > 60]
df.shape

In [None]:
df.head()

In [None]:
#identifying the month
df['month'] = df.endTime.str.split('-').apply(lambda x: (x[0], x[1]))


In [None]:

#listing months and features
months = list(set(df.month.values))
months.sort()
features = ['danceability', 'energy',
            'speechiness', 'instrumentalness', 'valence']


In [None]:
#standardizing features (we're interested not in their absolute value, but in how each changed over time)
for feature in features:
    df[f'{feature}_zscore'] = (
        df[feature] - df[feature].mean()) / df[feature].std()


In [None]:

#making sure we standardized correctly: mean is 0 and std is 1
df[[feature + '_zscore' for feature in features]].describe().loc['mean':'std'].T


In [None]:
#features averages by month
Month = namedtuple('Month', features)
avg_features_months = []
for month in months:
    df_month = df[df['month'] == month]
    avg_features = df_month.describe(
    ).loc['mean'][[feature + '_zscore' for feature in features]]
    month = Month(*avg_features)
    avg_features_months.append(month)


In [None]:
#labelling months
month_labels = [f'{month[1]}/{month[0]}' for month in months]
month_labels_short = [m[:3]+m[-2:] for m in month_labels]


In [None]:
def features_sns(features):
    fig, ax = plt.subplots(figsize=(25, 10))
    x = [x for x in range(13)]

    for feature in features:
        y = [getattr(month, feature) for month in avg_features_months]
        fig = sns.lineplot(x, y, label=feature, linewidth=6,
                           alpha=.7, marker='o', markersize=15)

    ax.set_xticks([x for x in range(13)])
    ax.set_xticklabels(labels=month_labels_short,
                       rotation=45, ha='right', size=30)

    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(30)

    leg = ax.legend(loc='upper left', bbox_to_anchor=(
        1, 1.03), prop={'size': 30})

    # get the individual lines inside legend and set line width
    for line in leg.get_lines():
        line.set_linewidth(10)

    ax.set_title('My mood in 2019 (According to Spotify)',
                 size=50, pad=30, fontname='sans-serif')

    return ax


In [None]:
features_sns(['valence', 'energy'])
x = [-1] + [x for x in range(13)]
alpha = .25
plt.ylim([-.5, .5])
plt.xlim([-.5, 12])

plt.fill_between(x[:6], -.5, 0.5, alpha=alpha)
plt.fill_between(x[5:8], -.5, 0.5, alpha=alpha)
plt.fill_between(x[7:11], -.5, 0.5, alpha=alpha, color="red")
plt.fill_between(x[10:], -.5, 0.5, alpha=alpha)

plt.annotate('Studying', (1, 0.4), size=35)
plt.annotate('Graduating', (4.15, 0.4), size=35,)
plt.annotate('Unemployed', (6.5, 0.4), size=35,)
plt.annotate('First Job', (10.35, 0.4), size=35,)

plt.axhline(y=0, color='b', linestyle='--')


plt.show()
