###  API wrappers - Create your collection of songs & audio features  
**Instructions**

To move forward with the project, you need to create a collection of songs with their audio features - as large as possible!

These are the songs that we will cluster. And, later, when the user inputs a song, we will find the cluster to which the song belongs and recommend a song from the same cluster. The more songs you have, the more accurate and diverse recommendations you'll be able to give. Although... you might want to make sure the collected songs are "curated" in a certain way. Try to find playlists of songs that are diverse, but also that meet certain standards.

The process of sending hundreds or thousands of requests can take some time - it's normal if you have to wait a few minutes (or, if you're ambitious, even hours) to get all the data you need.

An idea for collecting as many songs as possible is to start with all the songs of a big, diverse playlist and then go to every artist present in the playlist and grab every song of every album of that artist. The amount of songs you'll be collecting per playlist will grow exponentially!

### exploratory

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time
import pandas as pd
from pandas import json_normalize

In [None]:
# retrieve secrets
def read_secrets(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        client_id = lines[0].strip().split(':')[1]
        client_secret = lines[1].strip().split(':')[1]
    return client_id, client_secret

# Read client ID and client secret from secrets.txt
client_id, client_secret = read_secrets('secrets.txt')

In [None]:
# Initialize Spotipy client
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

# Function to retrieve all track IDs from a playlist
def get_playlist_tracks(playlist_id):
    results = sp.playlist_tracks(playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return [track['track']['id'] for track in tracks if track['track'] is not None]

# Function to retrieve all albums for an artist
def get_artist_albums(artist_id):
    albums = []
    results = sp.artist_albums(artist_id, album_type='album')
    albums.extend(results['items'])
    while results['next']:
        results = sp.next(results)
        albums.extend(results['items'])
    return albums

# Function to retrieve all tracks from an album
def get_album_tracks(album_id):
    tracks = []
    results = sp.album_tracks(album_id)
    tracks.extend(results['items'])
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return [track['id'] for track in tracks]


# Function to retrieve all albums for an artist with retries
def get_artist_albums_with_retry(artist_id, max_retries=3):
    retries = 0
    while retries < max_retries:
        try:
            return get_artist_albums(artist_id)
        except Exception as e:
            print(f"Error retrieving albums for artist {artist_id}. Retrying...")
            retries += 1
            time.sleep(2 ** retries)  # Exponential backoff
    raise Exception(f"Failed after {max_retries} retries.")

# Function to retrieve all tracks from an album with retries
def get_album_tracks_with_retry(album_id, max_retries=3):
    retries = 0
    while retries < max_retries:
        try:
            return get_album_tracks(album_id)
        except Exception as e:
            print(f"Error retrieving tracks for album {album_id}. Retrying...")
            retries += 1
            time.sleep(2 ** retries)  # Exponential backoff
    raise Exception(f"Failed after {max_retries} retries.")

# Function to get audio features for a list of tracks
def get_audio_features(track_ids):
    audio_features = []
    for i in range(0, len(track_ids), 50):
        batch = track_ids[i:i+50]
        audio_features.extend(sp.audio_features(batch))
    return audio_features

In [None]:
# initial playlist ID
playlist_id = '37i9dQZF1DXbTxeAdrVG2l'

# Retrieve track IDs from the seed playlist
playlist_id = get_playlist_tracks(playlist_id)

In [None]:
len(playlist_id)#

In [7]:
all_track_ids = []
for track_id in playlist_id:
    track_info = sp.track(track_id)
    artist_id = track_info['artists'][0]['id']
    albums = get_artist_albums_with_retry(artist_id)
    for album in albums:
        album_id = album['id']
        album_tracks = get_album_tracks_with_retry(album_id)
        all_track_ids.extend(album_tracks)
    time.sleep(0.5)

In [None]:
len(all_track_ids)

In [None]:
# Remove duplicates
all_track_ids = list(set(all_track_ids))
len(all_track_ids)

In [None]:
#all_track_ids

In [None]:
# Retrieve audio features for all tracks
audio_features = get_audio_features(all_track_ids)

In [None]:
# Create a dictionary to store song data
audio_feature_list = {}
for track_id, audio_feature in zip(all_track_ids, audio_features):
    audio_feature_list[track_id] = audio_feature

In [None]:
len(audio_feature_list)

In [None]:
# Create DataFrame
track_df = pd.DataFrame(audio_feature_list).T.reset_index()

# Rename columns
track_df.columns = ['track_id'] + list(track_df.columns[1:])

track_df

In [None]:
track_df.to_csv('2000s.csv', index=False)

In [None]:
df_1990s = pd.read_csv('1990s.csv')
df_2000s = pd.read_csv('2000s.csv')

combined_df = pd.concat([df_1990s, df_2000s], ignore_index=True)
combined_df

### refactored version

In [None]:
import pandas as pd
from spotipy_playlist import get_songs_from_playlists

In [None]:
from importlib import reload
import spotipy_playlist
reload(spotipy_playlist)

In [None]:
# all out of 1990 
nintety = '37i9dQZF1DXbTxeAdrVG2l'
# all out of 2000 
two_thousand = '37i9dQZF1DX4o1oenSJRJd'
# all out of 2010
twenty_ten = '37i9dQZF1DX5Ejj0EkURtP'

playlists = [nintety, two_thousand, twenty_ten]
playlists

In [None]:
# retrieve df from given playlists where it also grabs all songs by each artist, also write to csv (spotify_songs.csv)
ninety_df = get_songs_from_playlists(nintety)

In [None]:
two_thousand_df = get_songs_from_playlists(two_thousand, 'two_thousands')

In [None]:
twenty_ten_df = get_songs_from_playlists(twenty_ten, 'twenty_tens')

In [None]:
ninety_till_twenty_ten = get_songs_from_playlists(playlists, 'nineties_till_twenty_tens')