In [None]:
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import json
import csv

### Spotify Credentials

In [None]:
client_id = ''
client_secret = ''

In [None]:
# Spotify API Authorization
spotify_object = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)) 

In [None]:
'''
@ Spotify song details function
'''
def spotify_api(artist_name, search_query):
    # spotify api
    searchResults = spotify_object.search(q="artist:" + artist_name + " track:" + search_query, type="track")
    # results
    if len(searchResults['tracks']['items']) != 0:
        song_id = searchResults['tracks']['items'][0]['id']  #song id
        uri_id = searchResults['tracks']['items'][0]['uri']  #uri_id
        release_date = searchResults['tracks']['items'][0]['album']['release_date']  #release_date
        type = searchResults['tracks']['items'][0]['type']  #type
        explicit_content = searchResults['tracks']['items'][0]['explicit']  #explicit content
        popularity = searchResults['tracks']['items'][0]['popularity']  #popularity        
        return [song_id, uri_id, release_date, type, explicit_content, popularity]
    else:
        return None
    

In [None]:
'''
@ Spotify track audio analysis function
'''
def spotify_track_audio_attributes(id):
    song_attributes = spotify_object.audio_features(id)
    if len(song_attributes) != 0 or song_attributes != 'None':
        danceability = song_attributes[0]['danceability']
        energy = song_attributes[0]['energy']
        loudness = song_attributes[0]['loudness']
        speechiness = song_attributes[0]['speechiness']
        acousticness = song_attributes[0]['acousticness']
        instrumentalness = song_attributes[0]['instrumentalness']
        liveness = song_attributes[0]['liveness']
        valence = song_attributes[0]['valence']
        tempo = song_attributes[0]['tempo']
        return [danceability, energy, loudness, speechiness, acousticness, instrumentalness, liveness, valence, tempo]
    else:
        return None

### library and playlist combine data

In [None]:
'''
@ JSON parsing function
'''
def json_extract(data, songs_data):
    for i in data:
        f = open(i, encoding='utf8', errors='ignore')
        # change the JSON string into a JSON object
        jsonObject = json.load(f)
        
        # create a tuple for song details
        if 'tracks' in jsonObject:
            print('tracks json')
            for key in jsonObject['tracks']:
                artist = key['artist']      # artist name
                track_name = key['track']   # track name
                # call to api_data function
                '''
                api_data[0] -> #song_id, api_data[1] -> #uri_id
                api_data[2] -> #release_date, api_data[3] -> #type
                api_data[4] -> #explicit, api_data[5] -> #popularity
                '''
                api_data = spotify_api(artist, track_name)  # Function
                if api_data is not None:
                    # Call to track_analysis_attributes function
                    '''
                    track_attribute[0] -> #danceability value, track_attribute[1] -> #energy value
                    track_attribute[2] -> #loudness, track_attribute[3] -> #speechiness
                    track_attribute[4] -> #acousticness, track_attribute[5] -> #instrumentalness value
                    track_attribute[6] -> #liveness, track_attribute[7] -> #valence
                    track_attribute[8] -> #tempo 
                    '''
                    track_attribute = spotify_track_audio_attributes(api_data[0])   # Function 
                    # adding all the valus in a tuple
                    songs_data.append((
                        api_data[0], artist, track_name, api_data[1], api_data[2], 
                        api_data[4], api_data[3], api_data[5], 
                        track_attribute[0], track_attribute[1], track_attribute[2], track_attribute[3], 
                        track_attribute[4], track_attribute[5], track_attribute[6], track_attribute[7], 
                        track_attribute[8]
                    ))
                        
        elif 'playlists' in jsonObject:
            print('playlists json')
            for key in jsonObject['playlists']:
                for i in key['items']:
                    artist = i['track']['artistName']
                    track_name = i['track']['trackName']
                    # call to api_data function
                    api_data = spotify_api(artist, track_name)  # function
                    if api_data is not None: 
                        # Call to track_analysis_attributes function
                        '''
                        track_attribute[0] -> #danceability value, track_attribute[1] -> #energy value
                        track_attribute[2] -> #loudness, track_attribute[3] -> #speechiness
                        track_attribute[4] -> #acousticness, track_attribute[5] -> #instrumentalness value
                        track_attribute[6] -> #liveness, track_attribute[7] -> #valence
                        track_attribute[8] -> #tempo 
                        '''
                        track_attribute = spotify_track_audio_attributes(api_data[0])
                        # adding all the valus in a tuple
                        songs_data.append((
                            api_data[0], artist, track_name, api_data[1], api_data[2], 
                            api_data[4], api_data[3], api_data[5], 
                            track_attribute[0], track_attribute[1], track_attribute[2], track_attribute[3], 
                            track_attribute[4], track_attribute[5], track_attribute[6], track_attribute[7], 
                            track_attribute[8]
                        ))
    return songs_data

In [None]:
if __name__ == '__main__':
    data_df = []
    json_extract(['./dataset/YourLibrary.json', './dataset/Playlist1.json'], data_df)
    with open('./dataset/songs_data.csv', 'w', newline='', encoding='utf-8') as f:
        header = ['song_id', 'artist_name', 'track_name', 'uri_id', 'release_date', 'explicit', 'type', 'popularity', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
        writer = csv.writer(f)
        writer.writerow(header)
        writer.writerows(data_df)

### Stream history

In [None]:
'''
@ Streaming history data function
'''
def streaming_history_data(file, output):
    f = open(file, encoding='utf8', errors='ignore')
    # change the JSON string into a JSON object
    jsonObject = json.load(f)
    
    print('Process Started')
    for i in jsonObject:
        year = i['endTime'].split('-')[0]
        month = i['endTime'].split('-')[1]
        day = i['endTime'].split('-')[2].split(' ')[0]
        
        # call to api_data function
        song_details = spotify_api(i['artistName'], i['trackName'])     #Function
        '''
        song_details[0] -> #song_id, song_details[1] -> #uri_id
        song_details[2] -> #release_date, song_details[3] -> #type
        song_details[4] -> #explicit, song_details[5] -> #popularity
        '''
        if song_details is not None:
            # call to track_analysis_attributes function
            song_details_attribute = spotify_track_audio_attributes(song_details[0])    #Function
            '''
            track_attribute[0] -> #danceability value, track_attribute[1] -> #energy value
            track_attribute[2] -> #loudness, track_attribute[3] -> #speechiness
            track_attribute[4] -> #acousticness, track_attribute[5] -> #instrumentalness value
            track_attribute[6] -> #liveness, track_attribute[7] -> #valence
            track_attribute[8] -> #tempo 
            '''
            output.append((
                song_details[0], i['artistName'], i['trackName'], song_details[1], song_details[2], song_details[3], year,
                month, day, song_details[4], song_details[5], i['endTime'], i['msPlayed'],
                song_details_attribute[0], song_details_attribute[1], song_details_attribute[2], song_details_attribute[3],
                song_details_attribute[4], song_details_attribute[5], song_details_attribute[6], song_details_attribute[7],
                song_details_attribute[8] 
            ))
    print('Process Ended')
    return output

In [None]:
if __name__ == '__main__':
    output_data = []
    x = streaming_history_data('./dataset/StreamingHistory0.json', output_data)
    with open('./dataset/streaming_history_data.csv', 'w', newline='', encoding='utf-8') as f:
        header = ['song_id', 'artist_name', 'track_name', 'uri_id', 'release_date', 'type', 'year', 'month', 'day', 'explicit', 'popularity', 'end_time', 'ms_played', 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
        writer = csv.writer(f)
        writer.writerow(header)
        writer.writerows(output_data)