In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  
import urllib.request
import urllib.parse
import urllib.error
import json
import base64
import configparser
from bs4 import BeautifulSoup
import re
import pymongo
from datetime import datetime
import time
import collections
import editdistance

In [3]:
# Open a connection to the Mongo server
client = pymongo.MongoClient('mongodb://localhost:27017/')

In [4]:
# Create a database and a collections within it.
songs_db = client.songs
albums = songs_db.albums
tracks = songs_db.tracks
genius_tracks = songs_db.gtracks

In [5]:
config = configparser.ConfigParser()
config.read('secrets.ini')
[k for k in config['discogs']]

['app_name', 'consumer_key', 'consumer_secret', 'token']

In [None]:
def get_audio_features(track_ids, auth_type, auth_token):
    url = 'https://api.spotify.com/v1/audio-features?ids={ids}'.format(ids=','.join(track_ids))
    headers = {'Authorization': auth_type + ' ' + auth_token}
    request = urllib.request.Request(url, headers=headers, method='GET')
    
    for _ in range(10):
        try:
            with urllib.request.urlopen(request) as f:
                response = json.loads(f.read().decode('utf-8'))
                for track in response['audio_features']:
                    tracks.update_one({'_id': track['id']}, {'$set': track})
                break
        except urllib.error.HTTPError as e:
            print("Rate limited. Pausing for", e.info()['Retry-After'])
            time.sleep(int(e.info()['Retry-After']) + 0.5)
            continue     

In [None]:
def get_artists(artist_name):
    query = urllib.parse.urlencode({'q': artist_name, 'type': 'artist'})
    request = 'https://api.spotify.com/v1/search?{}'.format(query)
    with urllib.request.urlopen(request) as f:
        response = json.loads(f.read().decode('utf-8'))
        artists = []
        for artist in response['artists']['items']:
            if artist['name'].lower() == artist_name.lower():
                this_artist = {'name': artist['name'], 'id': artist['id']}
                if artist['images']:
                    this_artist['image'] = artist['images'][0]['url']
                artists += [this_artist]
    return artists

In [None]:
/database/search?q={query}&{?type,title,release_title,credit,artist,anv,label,genre,style,country,year,format,catno,barcode,track,submitter,contributor}

In [45]:
def get_artist(artist_name):
    query = urllib.parse.urlencode({'q': artist_name, 'type': 'artist'})
    # query = urllib.parse.urlencode({'q': artist_name})
    url = 'https://api.discogs.com/database/search?{}'.format(query)
    headers = {'Authorization': 'Discogs token=' + config['discogs']['token']}
    artists = []
    while url:
        request = urllib.request.Request(url, headers=headers, method='GET')
        with urllib.request.urlopen(request) as f:
            response = json.loads(f.read().decode('utf-8'))
            artists += response['results']
            if 'next' in response['pagination']['urls']:
                url = response['pagination']['urls']['next']
            else:
                url = None
    return artists

In [44]:
nivs = get_artist('Nirvana')
len(nivs)

{'next': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=2', 'last': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=5'}
{'first': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=1', 'next': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=3', 'last': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=5', 'prev': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=1'}
{'first': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=1', 'next': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=4', 'last': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=5', 'prev': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&type=artist&page=2'}
{'first': 'https://api.discogs.com/database/search?q=Nirvana&per_page=50&ty

201

In [47]:
[n for n in nivs if 'Nirvana' in n['title']]

[{'id': 125246,
  'resource_url': 'https://api.discogs.com/artists/125246',
  'thumb': 'https://api-img.discogs.com/6AS7RIgqIBFEuGusd3tG_z2J2rs=/150x150/smart/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/A-125246-1105986304.jpg.jpg',
  'title': 'Nirvana',
  'type': 'artist',
  'uri': '/artist/125246-Nirvana'},
 {'id': 307513,
  'resource_url': 'https://api.discogs.com/artists/307513',
  'thumb': 'https://api-img.discogs.com/5skyOqEGgSOnt9lhPHhjP4uCACE=/150x150/smart/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/A-307513-1270236942.jpeg.jpg',
  'title': 'Nirvana (2)',
  'type': 'artist',
  'uri': '/artist/307513-Nirvana-2'},
 {'id': 1087206,
  'resource_url': 'https://api.discogs.com/artists/1087206',
  'thumb': 'https://api-img.discogs.com/Wgp-oaSc03_RK9skZYyCp3YrQOU=/150x150/smart/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/A-1087206-1266869411.jpeg.jpg',
  'title': 'Nirvana 2002',
  'type': 'artist',
  'uri

In [48]:
tbs = get_artist('The Beatles')
len(tbs)

647

In [56]:
atbs = [a for a in tbs if re.match('^The Beatles(\s\(\d+\))?$', a['title'])]
len(atbs)

3

In [57]:
atbs

[{'id': 82730,
  'resource_url': 'https://api.discogs.com/artists/82730',
  'thumb': 'https://api-img.discogs.com/v1NIz7CyzwLHJsnSGIjg6sCL5FI=/150x150/smart/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/A-82730-1449581547-9306.jpeg.jpg',
  'title': 'The Beatles',
  'type': 'artist',
  'uri': '/artist/82730-The-Beatles'},
 {'id': 2517607,
  'resource_url': 'https://api.discogs.com/artists/2517607',
  'thumb': '',
  'title': 'The Beatles (2)',
  'type': 'artist',
  'uri': '/artist/2517607-The-Beatles-2'},
 {'id': 4290435,
  'resource_url': 'https://api.discogs.com/artists/4290435',
  'thumb': '',
  'title': 'The Beatles (3)',
  'type': 'artist',
  'uri': '/artist/4290435-The-Beatles-3'}]