# Winters, Alexander (V00970263)
# Music Recommendation System Part 1

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
import numpy as np
np.random.seed(1337)

In [3]:
import pandas as pd

## Authenticate w/ Spotify's API

In [4]:
cid = ''
secret = ''

In [5]:
# Authentication without user
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [6]:
# Create df's
artist_columns = ['id', 'name', 'genres', 'popularity']
playlist_one_artist_df = pd.DataFrame(columns=artist_columns)
playlist_two_artist_df = pd.DataFrame(columns=artist_columns)

track_columns = ['id', 'name', 'popularity', 'duration_ms', 'artists', 'id_artists', 'release_date', 
                 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 
                 'liveness', 'valence', 'tempo', 'time_signature']
playlist_one_tracks_df = pd.DataFrame(columns=track_columns)
playlist_two_tracks_df = pd.DataFrame(columns=track_columns)

## Get Track Information 

In [7]:
def get_track_info(playlist_uri, offset, idx, artist_df, tracks_df):
    
    playlist_one_track_id = []
    playlist_one_track_name = []
    playlist_one_track_popularity = []
    playlist_one_track_release_date = []
    playlist_one_artist_ids = []
    playlist_one_artist_names = []
    playlist_one_artist_pop  = []
    playlist_one_artist_genres = []

    i = idx # Where to add the row
    j = 0 # Add to offset -- usually 100 at a time

    # Get artist info from playlist_one
    for track in sp.playlist_tracks(playlist_uri, offset=offset)['items']:

        # Track ID
        playlist_one_track_id = track['track']['id'] 
        # Track Name
        playlist_one_track_name = track['track']['name']
        # Track Popularity
        playlist_one_track_popularity = track['track']['popularity']
        # Track Release Date
        playlist_one_track_release_date = track['track']['album']['release_date']


        #Artists 
        artist_list = []
        artist_ids = []
        for artist in track['track']['artists']:
            artist_list = artist['name']
            artist_ids = artist['id']

            # Get more artist info
            playlist_one_artist_info = sp.artist(artist_ids)

            # Popularity, Genre
            playlist_one_artist_pop = playlist_one_artist_info['popularity']
            playlist_one_artist_genres = playlist_one_artist_info['genres']

            artist_row = {'id':artist['id'],
                          'name':artist['name'],
                          'genres':playlist_one_artist_genres,
                          'popularity':playlist_one_artist_pop
                         }
        artist_df.loc[i] = artist_row

        playlist_one_artist_names = artist_list
        playlist_one_artist_ids = artist_ids

        # Get audio features and store in tracks_df
        features = sp.audio_features(playlist_one_track_id)
        new_row = {'id':playlist_one_track_id,
                   'name':playlist_one_track_name,
                   'popularity':playlist_one_track_popularity,
                   'duration_ms':features[0]['duration_ms'],
                   'artists':playlist_one_artist_names,
                   'id_artists':playlist_one_artist_ids,
                   'release_date':playlist_one_track_release_date,
                   'danceability':features[0]['danceability'],
                   'energy':features[0]['energy'],
                   'key':features[0]['key'],
                   'loudness':features[0]['loudness'],
                   'mode':features[0]['mode'],
                   'speechiness':features[0]['speechiness'],
                   'acousticness':features[0]['acousticness'],
                   'instrumentalness':features[0]['instrumentalness'],
                   'liveness':features[0]['liveness'],
                   'valence':features[0]['valence'],
                   'tempo':features[0]['tempo'],
                   'time_signature':features[0]['time_signature']
                   }

        tracks_df.loc[i] = new_row

        i += 1
        j += 1
        
    
    return artist_df, tracks_df, j, i


In [8]:
# Two very different playlists to test on
playlist_link_one = 'https://open.spotify.com/playlist/51bV1ikI9qzJGm0d9ix21Y?si=ea3cdb71a17f425f'
playlist_link_two = 'https://open.spotify.com/playlist/5jJBKcZxlxBjH2mYY5fVgT?si=90e74b2e92d94bde'

# Get playlist URI's
playlist_one_uri = playlist_link_one.split('/')[-1].split('?')[0]
playlist_two_uri = playlist_link_two.split('/')[-1].split('?')[0]

In [None]:
# Get the length of playlist_one
playlist_one_results = sp.playlist_tracks(playlist_one_uri)
num_playlist_one_tracks = len(playlist_one_results['items'])
while playlist_one_results['next']:
    playlist_one_results = sp.next(playlist_one_results)
    num_playlist_one_tracks += len(playlist_one_results['items'])  


offset_total = 0
idx = 0
while offset_total < num_playlist_one_tracks:
    playlist_one_artist_df, playlist_one_tracks_df, offset , idx = get_track_info(playlist_one_uri, offset_total, idx, playlist_one_artist_df, playlist_one_tracks_df)
    offset_total += offset

In [10]:
# Get the length of playlist_two
playlist_two_results = sp.playlist_tracks(playlist_two_uri)
num_playlist_two_tracks = len(playlist_two_results['items'])
while playlist_two_results['next']:
    playlist_two_results = sp.next(playlist_two_results)
    num_playlist_two_tracks += len(playlist_two_results['items'])  


offset_total = 0
idx = 0
while offset_total < num_playlist_two_tracks:
    playlist_two_artist_df, playlist_two_tracks_df, offset , idx = get_track_info(playlist_two_uri, offset_total, idx, playlist_two_artist_df, playlist_two_tracks_df)
    offset_total += offset

## Save Datasets Locally

In [None]:
playlist_one_tracks_df.to_csv('playlist_one_tracks.csv', index=False)

In [None]:
playlist_one_artist_df.drop_duplicates(subset='name', keep='first', inplace=True)

In [None]:
playlist_one_artist_df.info()

In [None]:
playlist_one_artist_df.to_csv('playlist_one_artists.csv', index=False)

In [13]:
playlist_two_tracks_df.to_csv('playlist_two_tracks.csv', index=False)

In [15]:
playlist_two_artist_df.drop_duplicates(subset='name', keep='first', inplace=True)

In [17]:
playlist_two_artist_df.to_csv('playlist_two_artists.csv', index=False)