# Music Taste Discovery

Importing confidential variables

In [3]:
from spotify_cred import client_id, client_secret, user_id

Importing important libraries to get data, clean it, and make it uasable for analysis

In [2]:
import requests                                                        # For making HTTP requests to the Spotify API

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials                    # To access authorised Spotify (public) data

In [None]:
import pandas as pd                                                    # To analyse/visualize data in a tabular format

In [None]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)

sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

In [7]:
num_playlist = 22                                                      # Number of playlist you want to pull

output_type = "application/json"                                       # Output format from Spotify API
content_type = "application/json"

# Change auth_token before running code
auth_token = ""


query_headers = {"Accept" : output_type,                               # Headers for the requests.get() method
                 "Content-Type" : content_type,
                 "Authorization" : f'Bearer {auth_token}'}

### Playlists

In [None]:
endpoint_url = f'https://api.spotify.com/v1/users/{user_id}/playlists' # Endpoint url of interest (user playlists)

query = f'{endpoint_url}?limit={num_playlist}'                         # Basic url query

response = requests.get(query,                                         # API call
               headers = query_headers)
response

In [None]:
json_response = response.json()                                        # Creating dict using .json() method

In [None]:
# Write the output response to file
with open('kc941 playlists.json', 'w', encoding='utf-8') as file:
    json.dump(json_response, file, ensure_ascii=False, indent=4)

In [None]:
# Opening JSON file
f = open('kc941 playlists.json',)
  
# returns JSON object as 
# a dictionary
spotify_playlists = json.load(f)

# Closing file
f.close()

In [None]:
playlist_simplified_dict = {}


# iterating over number of playlist
# playlist id becomes key, playlist uri and name become values

for i in range(len(spotify_playlists['items'])):
    playlist_simplified_dict[spotify_playlists['items'][i]['id']] = [spotify_playlists['items'][i]['uri'], 
                                                                     spotify_playlists['items'][i]['name']]


# converting playlist dictionary to DataFrame
playlist_df = pd.DataFrame.from_dict(playlist_simplified_dict, 
                                     orient = 'index', columns = ['uri', 'playlist_name'])


playlist_df['id'] = playlist_df.index                                # Default playlist index is name, making a column from index
playlist_df.index = [x for x in range(len(playlist_df))]             # Creating new index



playlist_df

### Playlist Tracks

In [None]:
dance_away = sp.playlist_tracks(playlist_id = '5bDiAmx1M4aFOiQlix5Oa2')    # Testing with just 1 playlist

In [None]:
tracks_dict = {}

for i in range(len(dance_away['items'])):
    tracks_dict[dance_away['items'][i]['track']['id']] = [dance_away['items'][i]['track']['uri'], dance_away['items'][i]['track']['name']]

tracks_df = pd.DataFrame.from_dict(tracks_dict, orient = 'index', columns = ['uri', 'track_name'])


tracks_df['id'] = tracks_df.index
tracks_df.index = [x for x in range(len(tracks_df))]

tracks_df

### Track Features and Track Info

In [None]:
track_features_dict = {}
for k in tracks_dict:
    track_features_dict[k] = sp.audio_features(k)

track_features_df = pd.DataFrame.from_dict(track_features_dict, orient ='index', columns = ['features'])

track_features_df['id'] = track_features_df.index
track_features_df.index = [x for x in range(len(track_features_df))]

track_features_df

In [None]:
track_features_df_2 = pd.concat([track_features_df.drop(['features', 'id'], axis = 1), 
                                 track_features_df['features'].apply(pd.Series)], axis = 1)

track_features_df_3 = track_features_df_2[['id', 'uri', 'acousticness', 'danceability', 'energy', 
                                          'instrumentalness', 'liveness', 'loudness', 'speechiness', 
                                          'tempo', 'valence']]

track_details_df = pd.merge(tracks_df, track_features_df_3, how = 'left', on = ['id','uri'])

track_details_df