In [90]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import requests
import json
import musicbrainzngs
import sqlalchemy

In [57]:
musicbrainzngs.set_useragent('application-project', '0.0.1')

In [92]:
from dotenv import load_dotenv
load_dotenv()
import os
# token = os.environ.get("ENV_VARIABLE")

SPOTIFY_CLIENT_ID = os.environ.get("SPOTIFY_CLIENT_ID")
SPOTIFY_CLIENT_SECRET = os.environ.get("SPOTIFY_CLIENT_SECRET")
LAST_FM_API_KEY = os.environ.get("LAST_FM_API")
username = os.environ.get('USERNAME')
password = os.environ.get('PASSWORD')

In [117]:
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData
from sqlalchemy.orm import sessionmaker

In [110]:
dialect = 'mysql'
driver = 'mysqldb'                # pip install mysqlclient
host = 'localhost'
port = '3306'
engine = sqlalchemy.create_engine(
    f'{dialect}+{driver}://{username}:{password}@{host}:{port}/eighties')

In [111]:
from sqlalchemy import text
with engine.connect() as connection:
    connection.execute(text('Create database if not exists eighties'))


In [126]:
client_credentials_manager = SpotifyClientCredentials(client_id=SPOTIFY_CLIENT_ID, client_secret=SPOTIFY_CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [127]:
def get_genre(isrc, api_key=LAST_FM_API_KEY):
    # Use musicbrainz to get the musicbraniz id (https://musicbrainz.org/doc/MusicBrainz_API)
    recording_list = musicbrainzngs.get_recordings_by_isrc(isrc)['isrc']['recording-list']
    if len(recording_list) > 1:
        print(recording_list)
    mbid=recording_list[0]['id']

    # Use Last FM to get the genres (https://www.last.fm/api/show/track.getInfo)
    url = f"http://ws.audioscrobbler.com/2.0/?method=track.getInfo&api_key={api_key}&mbid={mbid}&format=json"

    response = requests.get(url)
    data = json.loads(response.text)
    genres = data["track"]["toptags"]["tag"]
    genre_list = []
    for genre in genres:
        genre_list.append(genre['name'])
    return str(genre_list)

In [121]:
def get_track_features(track_id):
    features = sp.audio_features(track_id)[0]
    track = sp.track(track_id)
    popularity = track['popularity']
    genres = get_genre(track['external_ids']['isrc'])
    
    return {
        'popularity': popularity,
        'genres': genres,
        'danceability': features['danceability'],
        'energy': features['energy'],
        'key': features['key'],
        'loudness': features['loudness'],
        'mode': features['mode'],
        'speechiness': features['speechiness'],
        'acousticness': features['acousticness'],
        'instrumentalness': features['instrumentalness'],
        'liveness': features['liveness'],
        'valence': features['valence'],
        'tempo': features['tempo'],
        'duration_ms': features['duration_ms'],
        'time_signature': features['time_signature'],
        'isrc': track['external_ids']['isrc'],
    }

In [122]:
columns = ['popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature', 'isrc', 'genres']

In [129]:
df = pd.DataFrame(columns=columns)

In [130]:
# Set up query parameters
query = 'year:1980'
limit = 1
offset = 0

# Make initial request to get total number of results
result = sp.search(q=query, type='track', limit=1, offset=0)
total_results = result['tracks']['total']

# Loop through results and retrieve tracks
while offset < 10:
    result = sp.search(q=query, type='track', limit=limit, offset=offset)
    track_features = []
    for track in result['tracks']['items']:
        features = get_track_features(track['id'])
        track_features.append(features)
    offset += limit
    df = pd.concat([df, pd.DataFrame(track_features)])

[{'id': '21c90a1b-2b83-4ba7-a356-90d5bba8c236', 'title': 'Any Way You Want It', 'length': '220093', 'disambiguation': 'live, 1980-08-04~5: Cobo Arena, Detroit, MI, USA'}, {'id': 'e529e98e-9d25-49c7-8c17-e0eb876dc83f', 'title': 'Any Way You Want It', 'length': '203000'}]


In [None]:
df.to_sql('songs', engine, if_exists='replace')

10

In [None]:
with engine.connect() as connection:
    df2 = pd.read_sql_table('songs', con=connection, schema='eighties')

In [None]:
df2

Unnamed: 0,index,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,isrc,genres
0,0,81,0.31,0.7,9,-5.678,1,0.047,0.011,0.00965,0.0828,0.763,188.386,255493,4,AUAP08000046,"['hard rock', 'rock', 'classic rock', 'ACDC', ..."
1,0,79,0.532,0.767,7,-5.509,1,0.0574,0.00287,0.000513,0.39,0.755,127.361,210173,4,AUAP08000047,"['hard rock', 'rock', 'classic rock', '80s', '..."
2,0,76,0.751,0.501,5,-12.151,1,0.0551,0.234,0.112,0.0467,0.902,167.057,190627,4,USRC10301828,"['80s', 'pop', '500 Days of Summer', 'Soundtra..."
3,0,71,0.933,0.528,5,-6.472,0,0.162,0.112,0.329,0.163,0.756,109.975,214653,4,GBUM71029605,"['classic rock', 'rock', 'Queen', '80s', 'glam..."
4,0,78,0.916,0.72,0,-8.548,1,0.1,0.36,0.00016,0.0958,0.76,103.312,237000,4,USIR28000016,"['reggae', 'Bob Marley', 'roots reggae', '80s'..."
5,0,75,0.554,0.783,6,-5.852,1,0.0457,0.416,2e-06,0.631,0.813,105.39,162467,4,USRN19400384,"['country', '80s', 'female vocalists', 'pop', ..."
6,0,71,0.529,0.932,7,-7.501,1,0.0488,0.00251,0.00109,0.136,0.571,138.158,201693,4,USSM18100114,"['classic rock', '80s', 'rock', 'journey', 'ha..."
7,0,67,0.599,0.762,0,-6.887,1,0.0423,0.714,4e-06,0.35,0.715,76.961,163373,4,GBUM71029612,"['classic rock', 'rock', 'Queen', '80s', '70s']"
8,0,71,0.741,0.58,4,-9.05,1,0.0304,0.117,3.3e-05,0.212,0.944,127.402,171267,4,USCH38400009,"['80s', 'rock', 'classic rock', 'female vocali..."
9,0,71,0.306,0.722,0,-5.963,1,0.034,0.0519,0.0,0.117,0.41,174.732,200293,4,USSM11102617,"['80s', 'classic rock', 'rock', 'soft rock', '..."


In [9]:
# Make initial request to get total number of results
def get_number_of_tracks(year, letter):
    result = sp.search(q=f'year:{year} track:{letter}*', type='track', limit=1, offset=0)
    return result['tracks']['total']

In [10]:
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

In [11]:
def rec_func(year, letter_0 = ''):
    limit = 50
    for letter_1 in alphabet:
        letter = letter_0 + letter_1
        if get_number_of_tracks(year, letter) < 1000:
            print(year, letter)
            # Loop through results and retrieve tracks
            offset = 0

            while offset < total_results:
                result = sp.search(q=f'year:{year} track:{letter}*', type='track', limit=limit, offset=offset)
                track_features = []
                for track in result['tracks']['items']:
                    features = get_track_features(track['id'])
                    track_features.append(features)
                offset += limit
                df = pd.concat([df, pd.DataFrame(track_features)])
        else:
            rec_func(year, letter)

In [12]:
for year in range(1980, 1990):
    rec_func(year,'')

1980 aa


UnboundLocalError: local variable 'df' referenced before assignment

In [None]:
df.isna().any()