

## Types of data

* *slug*_songs.json: List of all the songs for that band. It contains metadata about the songs such as number of times played
* *slug*_years.json: List of every year shows were played and meta data about that those shows.
* *slug*_*year*_shows.json: Every show and source nested similarly to ReListen from that year. Placed in the *years* folder.


* *slug*_shows.csv: Every song, for every source, for every show, for every year. It contains everything, except meta data on ???

In [1]:
import json
import os
import requests
from pathlib import Path
import sys
import time
import urllib

api_url = "https://api.relisten.net/api/v2/"
slug = "grateful-dead"

# Save location
parent_dir = "%s_data" % slug
songs_file = Path(parent_dir) / ("%s_songs.json" % slug)
years_file = Path(parent_dir) / ("%s_years.json" % slug)
shows_dir = Path(parent_dir) / "shows"

try:
    os.mkdir(shows_dir)
except:
    pass
    
years = []
shows_dict = {}
combined_table = []


In [2]:
# Download songs

req_url = urllib.parse.urljoin(api_url, "artists/%s/songs"%slug)
r = requests.get (req_url)
print(r)
print(r.url)

parsed = json.loads(r.text)
print(len(parsed))

with open(songs_file, 'w') as f:
    json.dump(parsed, f, indent=4)


<Response [200]>
https://api.relisten.net/api/v2/artists/grateful-dead/songs
557


In [3]:
# Download years

req_url = urllib.parse.urljoin(api_url, "artists/%s/years"%slug)
r = requests.get (req_url)
print(r)
print(r.url)

parsed = json.loads(r.text)
print(len(parsed))

with open(years_file, 'w') as f:
    json.dump(parsed, f, indent=4)

for c in parsed:
    years.append(int(c["year"]))

<Response [200]>
https://api.relisten.net/api/v2/artists/grateful-dead/years
31


In [5]:
# Get show information, broken up by year

start = time.time()
last_time = start
for y in years:
    print("%d: " % y, end="")
    req_url = urllib.parse.urljoin(api_url, "artists/%s/years/%d"%(slug, y))
    r = requests.get (req_url)
    if r.status_code != 200:
        print("Error getting data for year: %d" % y)
    
    year = json.loads(r.text)
    shows = year
    
    for s in range(0, len(year["shows"])):
        req_url = urllib.parse.urljoin(api_url, "artists/%s/years/%d/%s"%(slug, y, year["shows"][s]["display_date"]))
        r = requests.get (req_url)
        if r.status_code != 200:
            print("Error getting data for year: %d; show: %s" % (y, year["shows"][s]["display_date"]))
            print("Status: %d" % r.status_code)

        show = json.loads(r.text)
        shows["shows"][s]["sources"] = show["sources"]

    with open(shows_dir / ("%d_shows.json"%y) , "w") as f:
        #json.dump(shows, f, indent=4)
        json.dump(shows, f)
    
    print("%5.2f, %5.2f" % (time.time()-last_time, time.time()-start))
    last_time = time.time()

1965:  0.46,  0.46
1966:  4.89,  5.35
1967:  3.15,  8.50
1968:  5.81, 14.30
1969: 17.25, 31.56
1970: 16.84, 48.40
1971: 14.32, 62.72
1972: 14.86, 77.58
1973: 14.45, 92.04
1974:  8.43, 100.47
1975:  3.39, 103.86
1976:  8.12, 111.97
1977: 13.23, 125.21
1978: 15.69, 140.90
1979: 15.31, 156.20
1980: 17.76, 173.96
1981: 19.53, 193.49
1982: 12.62, 206.11
1983: 15.41, 221.52
1984: 16.01, 237.53
1985: 16.73, 254.26
1986:  9.79, 264.05
1987: 18.41, 282.46
1988: 17.09, 299.55
1989: 17.02, 316.57
1990: 17.24, 333.81
1991: 16.43, 350.24
1992: 10.66, 360.90
1993: 15.02, 375.92
1994: 13.44, 389.36
1995:  8.35, 397.71


In [None]:
# Reshape downloaded files into single table

