<h1> Musikjahr </h1>

This Data Science project aims to analyze a Spotify user's music trends in the previous year and recommends new artists based on those music trends.

Install libraries related to API

In [15]:
%pip install python-dotenv
%pip install requests
%pip install pycountry

Install libraries related to Data Science tasks

In [16]:
%pip install seaborn

Install library related to Spotify OAuth2

In [17]:
from dotenv import load_dotenv
import os
import base64
import requests
import urllib.parse
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry

In [18]:
load_dotenv()

True

## Spotify API Implementation

In [19]:
spotify_client_id = os.getenv("SPOTIFY_CLIENT_ID")
spotify_client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
spotify_redirect_uri = os.getenv("SPOTIFY_REDIRECT_URI")
scope = "user-top-read user-read-private"

In [20]:
# Returns the authorization header required to query the API
def get_auth_header(token):
    return {"Authorization": "Bearer " + token}

In [21]:
def get_auth_code():
    auth_url = "https://accounts.spotify.com/authorize"
    
    params = {
        "client_id": spotify_client_id,
        "response_type": "code",
        "redirect_uri": spotify_redirect_uri,
        "scope": "user-top-read user-read-private",
    }

    auth_url = f"{auth_url}?{urllib.parse.urlencode(params)}"
    print(f"Please Authorize access by clicking on: {auth_url}")

    auth_code = input("Enter the code in the generated URL: ")
    
    return auth_code


In [22]:
# Returns new refresh token
def exchange_auth_code_for_refresh_token(auth_code):
    token_url = "https://accounts.spotify.com/api/token"
    
    data = {
        "grant_type": "authorization_code",
        "code": auth_code,
        "redirect_uri": spotify_redirect_uri,
        "client_id": spotify_client_id,
        "client_secret": spotify_client_secret
        }

    result = requests.post(token_url, data=data)
    json_result = json.loads(result.content)
    refresh_token = json_result['refresh_token']
    
    return refresh_token

In [23]:
# Returns a new access token upon every execution
def refresh_access_token(refresh_token):
    token_url = "https://accounts.spotify.com/api/token"
    
    data = {
        "grant_type": "refresh_token",
        "refresh_token": refresh_token,
        "client_id": spotify_client_id,
        "client_secret": spotify_client_secret
        }

    result = requests.post(token_url, data=data)
    json_result = json.loads(result.content)
    new_access_token = json_result['access_token']

    return new_access_token

In [24]:
auth_code = get_auth_code()
print(auth_code)

Please Authorize access by clicking on: https://accounts.spotify.com/authorize?client_id=9e3ab84ec3634133b2710bc672715e09&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A3001&scope=user-top-read+user-read-private
<PyodideFuture pending cb=[WebLoop._decrement_in_progress()]>


Enter the code in the generated URL:  AQDj3sYyGYsl1YvC6NShqhh7gvO9TtuKWKnFv6ctBxT6_s5IM7wq89qOCe360oQMQOZbDFAuxYlKTTDhflIcYYtBYrOX7WArlIvzhJ3Ui66F0bAz4cmtCDYNV1O-9bjgapQI1-O2O4J-9MdVWChpz-rGmUcXyyFwLx5ZVUBcSwc40WEHS0N8k3pGI1xOC3mm1ygwZc5jfJPY87A


In [25]:
refresh_token = exchange_auth_code_for_refresh_token(auth_code.result())

In [26]:
# Returns the user's top tracks based on given time period
def get_top_tracks(refresh_token, time_period, amount):
    api_url = "https://api.spotify.com/v1/me/top/tracks"
    
    params = {
        "limit": amount,
        "time_range": str(time_period),
        "offset": 0
    }
    
    result = requests.get(api_url, headers=get_auth_header(refresh_access_token(refresh_token)), params=params)
    data = json.loads(result.content)

    songs_dict={"Song_Title": [], "Album": [], "Artists": [], "Song_Popularity" : []}
    
    song_data = []
    album_data = []
    artists_data = []
    popularity_data = []

    # Retrieve song name and popularity and store in respective lists
    for song in data['items']:
        song_data.append(song['name'])
        popularity_data.append(song['popularity'])

    # Retrieve album name and artist name and store in respective lists
    for song in data['items']:
        artists_data_list = song['album']['artists']
        album_data.append(song['album']['name'])
        if(len(artists_data_list) > 1):
            artists_for_song = []
            for artists in artists_data_list:
                artists_for_song.append(artists['name'])
            artists_data.append(artists_for_song)
        else:
            for artists in artists_data_list:
                artists_data.append(artists['name'])

    # Copy data from lists to dict
    for song in song_data:
        songs_dict['Song_Title'].append(song)
    for album in album_data:
        songs_dict['Album'].append(album)
    for artist in artists_data:
        songs_dict['Artists'].append(artist)
    for pop in popularity_data:
        songs_dict['Song_Popularity'].append(pop)
            
    return songs_dict


In [27]:
# Returns user's top artists
def get_top_artists(refresh_token, time_period, amount):
    api_url = "https://api.spotify.com/v1/me/top/artists"
    
    params = {
        "limit": amount,
        "time_range": str(time_period),
        "offset": 0
    }
    
    result = requests.get(api_url, headers=get_auth_header(refresh_access_token(refresh_token)), params=params)
    data = json.loads(result.content)

    artists_dict={"Artist_Name": [], "Artist_Popularity" : []} # Add followers
    
    artist_data = []
    popularity_data = []

    # Retrieve song name and popularity and store in respective lists
    for artist in data['items']:
        artist_data.append(artist['name'])
        popularity_data.append(artist['popularity'])

   # Copy data from lists to dict
    for artist in artist_data:
        artists_dict['Artist_Name'].append(artist)
    for pop in popularity_data:
        artists_dict['Artist_Popularity'].append(pop)
            
    return artists_dict


In [28]:
# Returns a score of how popular the user's recent songs are
def get_listening_poularity_score(df):
    return float(df["Song_Popularity"].mean())

In [29]:
top_short_term_tracks = get_top_tracks(refresh_token, "short_term", 10) # Last 4 weeks
top_short_term_df = pd.DataFrame(top_short_term_tracks)
top_short_term_df

Unnamed: 0,Song_Title,Album,Artists,Song_Popularity
0,Snow On Tha Bluff,Snow On Tha Bluff,J. Cole,62
1,Soft Gentle Brilliant III,"From Silence, to Silence, and with Great Joy",Corey Kilgannon,16
2,Sk8er Boi,Let Go,Avril Lavigne,77
3,Rain in Cuba,Current Blues,Joe Hertz,42
4,Same Old Song,Same Old Song,The Lumineers,71
5,Stick Season,Stick Season,Noah Kahan,86
6,Heavy Is The Crown (Original Score) (from the ...,Arcane League of Legends: Season 2 (Soundtrack...,"[Arcane, League of Legends]",72
7,4 Your Eyez Only,4 Your Eyez Only,J. Cole,64
8,Remember Me (from the series Arcane League of ...,Arcane League of Legends: Season 2 (Soundtrack...,"[Arcane, League of Legends]",76
9,You’re Gonna Go Far,Stick Season (We'll All Be Here Forever),Noah Kahan,80


In [30]:
top_medium_term_tracks = get_top_tracks(refresh_token, "medium_term", 10) # Last 6 months
top_medium_term_df = pd.DataFrame(top_medium_term_tracks)
top_medium_term_df

Unnamed: 0,Song_Title,Album,Artists,Song_Popularity
0,baby pink (feat. Eem Triplin),"C,XOXO (Magic City Edition)",Camila Cabello,53
1,3 Nights,"Don't Forget About Me, Demos",Dominic Fike,69
2,Don't Go Yet,Familia,Camila Cabello,45
3,Jailhouse Rock,Platinum - A Life In Music,Elvis Presley,72
4,Paint The Town Blue (from the series Arcane Le...,Paint The Town Blue (from the series Arcane Le...,"[Ashnikko, Arcane, League of Legends]",72
5,Celia,Familia,Camila Cabello,41
6,Indecisive - Is It A Crime,Indecisive (Is It A Crime),Kidwild,66
7,Pope Is a Rockstar,SALES LP,SALES,66
8,Growing Up (feat. Ed Sheeran),This Unruly Mess I've Made,"[Macklemore & Ryan Lewis, Macklemore, Ryan Lewis]",37
9,Arabella,AM,Arctic Monkeys,74


In [31]:
top_long_term_tracks = get_top_tracks(refresh_token, "long_term", 10) # Last 12 months
top_long_term_df = pd.DataFrame(top_long_term_tracks)
top_long_term_df

Unnamed: 0,Song_Title,Album,Artists,Song_Popularity
0,Bothmer Schloss,Bothmer Schloss,Marie Bothmer,30
1,baby pink (feat. Eem Triplin),"C,XOXO (Magic City Edition)",Camila Cabello,53
2,Feel It - From The Original Series “Invincible”,Feel It (From The Original Series “Invincible”),d4vd,77
3,3 Nights,"Don't Forget About Me, Demos",Dominic Fike,69
4,Young And Beautiful,Music From Baz Luhrmann's Film The Great Gatsb...,Various Artists,54
5,Don't Go Yet,Familia,Camila Cabello,45
6,Jailhouse Rock,Platinum - A Life In Music,Elvis Presley,72
7,Say Yes To Heaven,Say Yes To Heaven,Lana Del Rey,83
8,Paint The Town Blue (from the series Arcane Le...,Paint The Town Blue (from the series Arcane Le...,"[Ashnikko, Arcane, League of Legends]",72
9,On An Evening In Roma,The Capitol Years,Dean Martin,41


In [32]:
get_listening_poularity_score(top_long_term_df)

59.6

In [33]:
top_short_term_artists = get_top_artists(refresh_token, "short_term", 10)
top_short_term_artists_df = pd.DataFrame(top_short_term_artists)
top_short_term_artists_df

Unnamed: 0,Artist_Name,Artist_Popularity
0,J. Cole,84
1,Lana Del Rey,89
2,Arcane,85
3,Avril Lavigne,75
4,Oh Wonder,65
5,NF,77
6,Corey Kilgannon,33
7,Gracie Abrams,87
8,Grace VanderWaal,53
9,Suki Waterhouse,69


In [34]:
top_medium_term_artists = get_top_artists(refresh_token, "medium_term", 10)
top_medium_term_artists_df = pd.DataFrame(top_medium_term_artists)
top_medium_term_artists_df

Unnamed: 0,Artist_Name,Artist_Popularity
0,Camila Cabello,79
1,Elvis Presley,79
2,Kendrick Lamar,93
3,Arctic Monkeys,86
4,J. Cole,84
5,Bruno Mars,94
6,Gracie Abrams,87
7,Lana Del Rey,89
8,Oh Wonder,65
9,The Neighbourhood,83


In [35]:
top_long_term_artists = get_top_artists(refresh_token, "long_term", 10)
top_long_term_artists = pd.DataFrame(top_long_term_artists)
top_long_term_artists

Unnamed: 0,Artist_Name,Artist_Popularity
0,Lana Del Rey,89
1,Ed Sheeran,86
2,Oh Wonder,65
3,Camila Cabello,79
4,Stormzy,70
5,Aitch,66
6,Gracie Abrams,87
7,Post Malone,87
8,Elvis Presley,79
9,NF,77


In [36]:
lastfm_api_key = os.getenv("LASTFM_API_KEY")
lastfm_shared_secret = os.getenv("LASTFM_SHARED_SECRET")
lastfm_redirect_uri = os.getenv("LASTFM_REDIRECT_URI")

CORS Proxy must be enabled. To enable it, click on the button in this link: https://cors-anywhere.herokuapp.com/corsdemo


In [109]:
# Returns song tags (genres or moods)
def get_song_tags(api_key, tag_type, song_name, artist_name):

    api_url = "http://ws.audioscrobbler.com/2.0/"
    proxy_url = "https://cors-anywhere.herokuapp.com/"
    
    tag_list = []

    if tag_type == "genres":   
        tag_df = pd.read_csv("genres.csv")
    elif tag_type == "moods":   
        tag_df = pd.read_csv("moods.csv")
        
    tag_df = tag_df.map(lambda x: x.lower())
    
    params = {
        "method": "track.getTopTags",
        "artist": artist_name,
        "track": song_name,
        "api_key": str(api_key),
        "format": "json"
    }

    response = requests.get(proxy_url + api_url, params=params)
    data = response.json()
    for tag in data['toptags']['tag']:
        if tag['name'].lower() in tag_df.values:
            tag_list.append(tag['name'])

    return tag_list

In [39]:
# Returns chart data
def get_chart_data(api_key, method):
    api_url = "http://ws.audioscrobbler.com/2.0/"
    proxy_url = "https://cors-anywhere.herokuapp.com/"

    params = {
             "method": "chart." + method,
            "api_key": str(api_key),
            "format": "json"
        }
    
    response = requests.get(proxy_url + api_url, params=params)
    data = response.json()

    if method == "getTopTracks":
        topTracks = {"Song_Title": [], "Artist_Name": []}
        for x in data['tracks']['track']:
            topTracks['Song_Title'].append(x['name'])
            topTracks['Artist_Name'].append(x['artist']['name'])
        return topTracks
    
    elif method == "getTopArtists":
        topArtists = []
        for x in data['artists']['artist']:
            topArtists.append(x['name'])
        return topArtists

    elif method == "getTopTags":
        topTags = []
        for x in data['tags']['tag']:
           topTags.append(x['name'])
        return topTags


In [41]:
# Converts country alpha 2 code (E.g. "DE") to ISO 3166-1 country names standard ("Germany")
def convert_country(alpha_2_code):
    country = pycountry.countries.get(alpha_2=alpha_2_code.upper())
    return country.name

In [40]:
# Returns chart data by country
def get_chart_data_by_country(api_key, method, country):
    
    api_url = "http://ws.audioscrobbler.com/2.0/"
    proxy_url = "https://cors-anywhere.herokuapp.com/"

    params = {
            "country": country,
            "method": "geo." + method,
            "api_key": str(api_key),
            "format": "json"
        }
    
    response = requests.get(proxy_url + api_url, params=params)
    data = response.json()
    
    if method == "getTopTracks":
        topTracks = {"Song_Title": [], "Artist_Name": []}
        for x in data['tracks']['track']:
            topTracks['Song_Title'].append(x['name'])
            topTracks['Artist_Name'].append(x['artist']['name'])
        return topTracks
    
    elif method == "getTopArtists":
        topArtists = []
        for x in data['topartists']['artist']:
            topArtists.append(x['name'])
        return topArtists

In [111]:
get_song_tags(lastfm_api_key, "genres", top_long_term_df['Song_Title'][7], top_long_term_df['Artists'][7])

['alternative',
 'dream pop',
 'indie',
 'pop',
 'indie pop',
 'slowcore',
 'rock',
 'alternative rock',
 'ambient',
 'acoustic',
 'psychedelic pop',
 'Alt-country',
 '2010s',
 'art pop']

In [110]:
get_song_tags(lastfm_api_key, "moods", top_long_term_df['Song_Title'][7], top_long_term_df['Artists'][7])

['sad', 'melancholic']