## Setup

### Package Import

In [None]:
# Install external packages for Google Colab
if 'google.colab' in str(get_ipython()):
    !pip install rasterfairy-py3 spotipy umap-learn -q

In [None]:
# Import packages
from getpass import getpass        # secure text entry
from io import BytesIO             # reading binary data
from IPython.display \
  import clear_output              # clearing cell output
import numpy as np                 # mathematics
import os                          # setting environment variables
import pandas as pd                # storing audio features as dataframe
# TODO: import the PIL module from the Image package
"___________________"              # image export
import plotly.express as px        # basic interactive visualisation
import plotly.graph_objects as go  # advanced plotting functionality
import rasterfairy                 # rasterfying point clouds
import requests                    # web requests
from sklearn.preprocessing \
  import StandardScaler            # feature standardisation
# TODO: import the spotipy package
"____________"                     # Python interface to Spotify API
import umap                        # dimensionality reduction

### Parameters

In [None]:
# Parameters
# NB: Use 'vr1q8on6ji60ceo0kregujkkx' for playlists from WDSS
# TODO: supply a username
USERNAME = '_________________________'  # user to extract public playlists from
RESOLUTION = 2  # 0 (extreme); 1 (high); 2 (low)
ORIENTATION = 'landscape'  # landscape; portrait
MIN_RATIO = 0.5  # minimum ratio between shortest and longest dimension

### Helper Functions

In [None]:
# Define helper functions
def depaginate(first_page):
  """
  Flatten the paginated response from the Spotify API.
  
  The Spotify API returns responses over a certain size using pagination (like
  how Google has multiple page of results). This function takes the first page
  of a response and uses this to run through all other pages and return the
  page items in a flattened format.
  """
  page = first_page
  while page:
    for item in page['items']:
      yield item
    page = sp.next(page)

def resolution_map(resolution):
  """Return the pixel value corresponding to different resolution levels."""
  return [600, 300, 64][resolution]

def mel_to_harm(key):
  """Convert absolute keys to positions relative to the circle of fifths."""
  return (key * 7) % 12

def create_hover_text(track_df, features=None):
  """
  Create hover text for plots.
  
  Omit the second argument to leave out extra hover text for audio features.
  """
  text = (
      "Name: " + track_df['name'] + "<br>" +
      "Artist: " + track_df['artist'] + "<br>" +
      "Album: " + track_df['album'] + "<br><extra>"
  )
  if features:
    for f in features:
      text = text + f + ": " + track_df[f].apply(str) + "<br>"
  return text + "</extra>"

def find_valid_sample_count(track_df, min_ratio):
  """
  Find a sample count that can form a rectangle satisfying a minimum ratio.

  For difficult numbers of observations (e.g. a prime), only long rectangles
  can be formed in the final visualisation. We therefore keep decreasing the
  number of points until we have form a rectangle that satisfies a minimum
  ratio between side lengths.
  """
  n = len(track_df)
  found_solution = False
  while not found_solution:
    i, j = rasterfairy.getRectArrangements(n)[0]
    if i / j >= MIN_RATIO:
      found_solution = True
    else:
      n -= 1
  return n

### API Connection

In [None]:
# Authentication environment variables
overwrite_existing = False  # set to True to overwrite existing values
if overwrite_existing or 'SPOTIPY_CLIENT_ID' not in os.environ \
    or 'SPOTIPY_CLIENT_SECRET' not in os.environ:
  os.environ['SPOTIPY_CLIENT_ID'] = getpass("What is your client ID?\n")
  # TODO: ask for the client secret and assign it to the
  # `SPOTIPY_CLIENT_SECRET` environment variable (base this on the line above)
  "____________________________________________________________________________"

In [None]:
# Initialise client
auth_manager = spotipy.SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

## Data Scraping

### Collect Playlists

In [None]:
# Create list to store key playlist information
playlist_info = []

# Loop through all playlists
for playlist in depaginate(sp.user_playlists(USERNAME)):
  # Add playlist information to list
  playlist_info.append({
      'name': playlist['name'],
      'size': playlist['tracks']['total'],
      # TODO: add the `id` field to the playlist information
      "___________________",
  })

In [None]:
# Display playlist choices
max_digits = len(str(len(playlist_info)))
for i, info in enumerate(playlist_info, 1):
  print(f"({str(i).zfill(max_digits)}) {info['name']} [{info['size']} tracks]")

### Playlist Selection

In [None]:
# Get user to select a playlist
valid = False  # placeholder to start off loop

while not valid:
  # Ask user for input
  choice_idx = input("Select a playlist index from the list above:\n")
  # Check if the input was valid
  valid = choice_idx.isnumeric() and 0 < int(choice_idx) <= len(playlist_info)
  if not valid:
    clear_output(wait=True)
    print(f"Selection must be an integer between 1 and {len(playlist_info)}")

# Python returns input as text so convert to an integer
choice_idx = int(choice_idx)
print(f"\nYou selected '{playlist_info[choice_idx-1]['name']}'")

### Collect Tracks Details

In [None]:
# Get the ID of the choosen playlist
choice_id = playlist_info[choice_idx-1]['id']
# Create list to store key playlist information
# TODO: create an empty list called `track_info` (see Collect Playlists)
"_____________"

# Loop through all playlist items
# TODO: call the `sp.playlist_items(...)` command on the choosen id and
# loop through the depaginated results (again, see Collect Playlists)
for item in "______________________________________":
  # Extract track information, ignoring item metadata (e.g. date added)
  track = item['track']
  track_info.append({
      'name': track['name'],
      'artist': track['artists'][0]['name'],
      'album': track['album']['name'],
      # NB: some tracks are missing artworks for all/some resolutions. If this
      # is an issue, the simplest solution is to remove them from the playlist
      'art_url': track['album']['images'][RESOLUTION]['url'],
      'id': track['id'],
  })

### Collect Audio Features

In [None]:
# List features to collect. See: https://spoti.fi/2Rhrtye
# TODO: add valence and tempo to the end of the list of included features
included = (
    'duration_ms', 'key', 'mode', 'time_signature',
    'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 
    'loudness', 'speechiness', '_______', '_____', 
)

# Audio feature requests have to be performed in batches of at most 100 IDs
for offset in range(0, len(track_info), 100):
  # Collect all features
  features = sp.audio_features(t['id'] for t in track_info[offset:offset+100])
  # Filter to only include the features listed above
  features = [{k: v for k, v in t.items() if k in included} for t in features]
  # Add features to track info
  for i in range(min(100, len(track_info) - offset)):
    track_info[i + offset].update(features[i])

In [None]:
# Convert to dataframe
track_df = pd.DataFrame(track_info)

## Pre-processing

### Sampling

In [None]:
# Find a sample count that satisfies the minimum ratio
# TODO: find n, the maximum valid sample count by running the
# `find_valid_sample_count(...)` function with the two arguments `track_df`
# and `MIN_RATIO` (separate the arguments with a comma)
n = "__________________________________________"

# Take random sample of chosen size
print(f"Randomly removing {len(track_df) - n} tracks to leave {n} remaining")
track_df = track_df.sample(n, random_state=1729).reset_index(drop=True)

In [None]:
# Extract feature columns into own dataframe
# TODO: drop the column containing the url for the cover art
# (see Collect Track Details to find its name)
features_df = track_df.drop(
    ['name', 'artist', 'album', '_______', 'id'], axis=1
)

### Feature Engineering

In [None]:
# Transform key from polar coordinates using melodic distance
features_df['key_melodic_x'] = np.cos(features_df['key'] / 6 * np.pi)
# TODO: Create a column `key_melodic_y` in a simple way to the x version
# but using the sine function rather than cosine
"___________________________________________________________________"

# Transform key from polar coordinates using harmonic distance
features_df['key_harmonic'] = features_df['key'].apply(mel_to_harm)
# TODO: Create `key_harmonic_x` and `key_harmonic_y` by transforming
# `key_harmonic` in the same way that we transformed `key` above
"_____________________________________________________________________________"
"_____________________________________________________________________________"

# Remove redundant columns
# Drop the columns `key` and `key_harmonic` (see the previous code cell)
features_df.drop("_____________________", axis=1, inplace=True)

# Handle missing keys
features_df.iloc[:,-4:].fillna(0, inplace=True)

### Standardisation

In [None]:
# TODO: transform the `features_df` using a standardisation scaler
scaled_features = StandardScaler().fit_transform("_________")

# Two features represent the key so weight these a factor of one fourth
scaled_features[:,-4:] /= 4

## Dimensionality Reduction

### Embedding

In [None]:
# Embedded the audio features into two dimensions
embedding = umap.UMAP(
    n_components=2,
    # Embedding hyperparameters used to balance local and global structure
    # See:
    #   https://pair-code.github.io/understanding-umap/
    #   https://umap-learn.readthedocs.io/en/latest/parameters.html
    # TODO: Set initial hyperparameter values to be 10 for `n_neighbours` and
    # 0.5 for `min_dist`
    "____________",
    "__________",
    metric='euclidean',
    # Random seed used for reproducibility
    random_state=1729).fit_transform(scaled_features)

In [None]:
# View embedding
x = embedding[:, 0]
# TODO: use the column with index 1 for the y variable
"_________________"

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x, y=y,
    hovertemplate = '%{text}',
    # Add `included` as a second argument to show audio features on hover
    text=create_hover_text(track_df)
))

fig.update_traces(mode='markers')

fig.show()

### Rasterfication

In [None]:
# Rasterfy embedding
grid, dims = rasterfairy.transformPointCloud2D(
    embedding[:, :], proportionThreshold=MIN_RATIO
)

# Fix grid type
grid = grid.astype(int)

# Fix orientation (portrait by default)
if ORIENTATION == 'portrait':
   pass
elif ORIENTATION == 'landscape':
  grid = grid[:, ::-1]
  dims = dims[::-1]
else:
  raise ValueError("invalid orientation")

In [None]:
# View rasterfied embedding
x = grid[:, 0]
y = grid[:, 1]

fig = go.Figure()

fig.add_trace(go.Scatter(
    # TODO: Fill in the missing scatter plot parameters (see the plot above)
    "_______",
    hovertemplate = '%{text}',
    # Add `included` as a second argument to show audio features on hover
    text="_________________________"
))

# TODO: plot markers rather than lines (again, see above)
"_______________________________"

# TODO: show the plot
"_______"

## Visualisation

### Create Image

In [None]:
# Covert resolution level to number of pixels
px = resolution_map(RESOLUTION)
width = px * dims[0]
# TODO: create a variable `height` using the 2nd element of `dims`
"___________________"

# Create empty grid to store cover art
art_grid = np.empty((height, width, 3))

for i, row in track_df.iterrows():
  res = requests.get(row['art_url'])
  img = Image.open(BytesIO(res.content))
  img = img.resize((px, px), Image.ANTIALIAS).convert('RGB')
  art_grid[px * grid[i, 1]:px * (grid[i, 1] + 1),
           px * grid[i, 0]:px * (grid[i, 0] + 1)] = np.array(img)

### Interactive Visualisation

In [None]:
fig = go.Figure()

# Add invisible markers in the corners to help with autoscaling
fig.add_trace(go.Scatter(
    x=[0, width], y=[0, height],
    mode='markers', marker_opacity=0
))

# Add background image
fig.add_layout_image({
        'x': 0, 'sizex': width, 'xref': 'x',
        'y': height, 'sizey': height, 'yref': 'y',
        'opacity': 1.0, 'layer': 'below', 'sizing': 'stretch',
        # Images are plotted downwards so reverse first axis
        'source': Image.fromarray(art_grid.astype(np.uint8))
})

# Add labels through an invisible heatmap
x = [px // 2 + px * i for i in range(dims[0])]
y = [px // 2 + px * j for j in range(dims[1])]
# Add `included` as a second argument to show audio features on hover
hover_text = create_hover_text(track_df)
labels = np.empty(dims[::-1], dtype='object')
for i in range(len(track_df)):
    labels[dims[1] - 1 - grid[i, 1], grid[i, 0]] = hover_text[i]
z = np.random.uniform(size=dims[::-1])  # random values for invisible heatmap

fig.add_trace(go.Heatmap(
    x=x, y=y, z=z, hovertemplate = '%{text}', text=labels,
    opacity=0, showscale=False
))

fig.update_xaxes(
    visible=False,
    range=[0, width]
)
fig.update_yaxes(
    # TODO: set axis visibility and range (using `height`)
    "___________",
    "_______________",
    # Fix aspect ratio
    scaleanchor='x'
)

fig.update_layout(
    width=width,
    height=height,
    margin={"l": 0, "r": 0, "t": 0, "b": 0},
    plot_bgcolor='black'
)

# Disable the autosize on double click because it adds unwanted margins
fig.show(config={'doubleClick': 'reset'})

### Image Export

In [None]:
# Save image to files - download from the sidebar
Image.fromarray(art_grid.astype(np.uint8)).save('wall_of_music.png')