# Pulling User Listening Data

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth

import pandas as pd
from helper_functions import *

In [2]:
# Just a few constants...
# ----------------------|
CLIENT_ID = "df50d92dc49b4af09e6eb53326a5df90"
CLIENT_SECRET = "3cadcf7af6374944afec1604e57f3a52"
REDIRECT_URL = "http://localhost:9001/callback"
SCOPE = "user-library-read user-top-read playlist-modify-public"

# Set up authentication
# --------------------|
sp = spotipy.Spotify(auth_manager = SpotifyOAuth(
  client_id = CLIENT_ID,
  client_secret = CLIENT_SECRET,
  redirect_uri = REDIRECT_URL,
  scope = SCOPE 
))

In [3]:
# Load top artist data
# -------------------|
print("\nGetting and Filtering Top Artist Data...\n")

# Get the filtered list of top artists 
top_artists = offset_api_limit(sp, sp.current_user_top_artists())
# Create the data frame
top_artists_df = get_artists_df(top_artists)
# Save the data in a .pkl file
top_artists_df.to_pickle("./top_artists.pkl")

print("Artist Data Saved!")


Getting and Filtering Top Artist Data...

Artist Data Saved!


In [None]:
print("\nGetting and Filtering Top Track Data...\n")

# Get the filtered list of top tracks
top_tracks = offset_api_limit(sp, sp.current_user_top_tracks())
# Create the track data frame
top_tracks_df = get_tracks_df(top_tracks)
# Add the audio features
top_tracks_df = get_track_audio_df(sp, top_tracks_df)

top_tracks_df.to_pickle("./top_tracks.pkl")

print("Track Data Saved!")

In [None]:
# Load playlist data
# -----------------|
print("\nGetting Playlist Data...\n")

# Create the playlists data frame
playlists_df = get_all_playlist_tracks_df(sp, sp.current_user_playlists())
# Add the audio features
playlists_df = get_track_audio_df(sp, playlists_df)
# Save the data in a .pkl file
playlists_df.to_pickle("./playlists.pkl")

print("Playlist Data Saved!")

In [None]:
playlists_df = pd.read_pickle("./data/playlists.pkl")

In [None]:
# Generating the sample recommendations
print("\nGetting Sample Recommendations...\n")

search_space = ['temp2', 'temp', 'waiter', "Zack’s Cake", 'Just Once More',
    'West End Coffee', 'Tour Dates', 'Shea Butter Shampoo',
    'Biting the Bottleneck', 'Scare Tactics',
    'I should have seen the view from halfway down',
    'Never quite steady', 'Tautology', "Don’t forget the bouquet!",
    'Chillis Booth', '15mg', 'Gradient Descent', 'Market Watch',
    'Project ⊗', 'The Iceman !', 'The intermediary 🚧', 'Smoothie King',
    'Bus No. 5509', 'Driving down eldridge', 'The Cedars of Edina']

filtered_playlists = playlists_df[playlists_df["playlist_name"].isin(search_space)]

# Extracting track IDs from the filtered playlists
seed_track_ids = filtered_playlists['id'].drop_duplicates().tolist()

# Getting the recommendations
# Change indexing and call multiple times because of rate-limits
recs = get_recs(sp, seed_track_ids, 11)

# Merge the temporary files created above
recs_df = merge_temp_files(11)

# Intermediary save for new recommendations
recs_df.to_pickle("./data/new_recommendations.pkl")

# Add previous recommedations to this df
old_recs_df = pd.read_pickle("./data/old_recommendations.pkl")
recs_df = pd.concat([recs_df, old_recs_df], ignore_index = True)

# Drop duplicates
recs_df = recs_df.drop_duplicates(subset = ['id'])

# Adding audio features
# Note: Batching required, refer to the below cells
recs_df = get_track_audio_df(sp, recs_df)

# Writing out to a .pkl file
recs_df.to_pickle("./recommendations.pkl")

In [3]:
# old_recs_df = pd.read_pickle("./data/old_recommendations.pkl")
# new_recs_df = pd.read_pickle("./data/new_recommendations.pkl")

# recs_df = pd.concat([new_recs_df, old_recs_df], ignore_index = True)
# recs_df = recs_df.drop_duplicates(subset = ['id'])

# recs_df.shape

(16522, 19)

In [4]:
# Run this in batches manually by indexing
# i.e; recs_df.iloc[i:i+batch_size] = get_track_audio_df(sp, recs_df.iloc[i:i+batch_size])
# tmp_df = get_track_audio_df(sp, recs_df.iloc[16500:recs_df.shape[0], :])
# tmp_df.to_pickle("./data/tmp_recs_audio_{25}.pkl")

In [3]:
# full_recs_df = merge_temp_files(25)
# full_recs_df.to_pickle("./data/recommendations.pkl")