In [1]:
import json
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import preprocessing

import os
from mutagen.id3 import ID3
from mutagen.mp3 import MP3
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np

In [2]:
def analyze(folder_path):
    df = pd.DataFrame(columns = ['Title','Artist','Album','URI','Features','Analyzed'])

    client_id = ''
    client_secret = ''

    files = os.listdir(folder_path)
    keyword = '.mp3'
    for file in files[:]:
        if keyword in file:
            audio = ID3(os.path.join(folder_path,file))
            title = audio['TIT2'].text[0]
            artist = audio['TPE1'].text[0]
            album = audio['TALB'].text[0]
            print(artist,' - ',title)
            # df.loc[len(df)] = [title,artist,album]

            # Make your own Spotify app at https://beta.developer.spotify.com/dashboard/applications
            client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
            sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
            sp.trace=False
            search_querry = title + ' ' + artist
            result = sp.search(search_querry)
            uri = ''
            analyzed = 'NO'
            for i in result['tracks']['items']:
                # Find a song that matches title and artist
                if (i['artists'][0]['name'] == artist) and (i['name'] == title):
                    # print (i['uri'])
                    uri = i['uri']
                    analyzed = 'YES'
                    break
            else:
                try:
                    # Just take the first song returned by the search (might be named differently)
                    print (result['tracks']['items'][0]['uri'])
                    uri = i['uri']
                    analyzed = 'MAYBE'
                except:
                    # No results for artist and title
                    print ("* Cannot Find URI: ",artist,' - ',title)
                    analyzed = 'NO'


            features = {}
            if uri != '':
                    features_batch = sp.audio_features(uri)
                    features.update({ track_id : track_features 
                                 for track_id, track_features in zip(uri, features_batch) })

                    # print(uri)
                    # print(features)
                    # print(features.values())
                    features = list(features.values())[0]#list(cnns_arch.values())[0]

            # features['1mqlc0vEP9mU1kZgTi6LIQ']
            df.loc[len(df)] = [title,artist,album,uri,features,analyzed]

    df = df.join(pd.json_normalize(df['Features'].tolist()).add_prefix('Features.')).drop(['Features'], axis=1)

    new_columns = []
    for i in range(len(df.columns)):
        new_columns.append(df.columns[i].split('.')[-1].upper())
    df.columns = new_columns
    df.head()
    return df

## Download Metadata 

In [4]:
root_dj = "/Users/alper/Documents/DJ"
dict_analysis = {}
current_playlists = os.listdir(root_dj)
for playlist in list(current_playlists)[:]:
    folder_path = os.path.join(root_dj,playlist)
    print(folder_path)

    if playlist not in ['.DS_Store']:
        df = analyze(folder_path)
        df['PLAYLIST'] = playlist
        dict_analysis[playlist] = df
    print()
    print()
    print()
    print()
    print()

/Users/alper/Documents/DJ/House Slow
Cyril Hahn/Shy Girls/Henry Krinkle  -  Perfect Form - Henry Krinkle Remix
* Cannot Find URI:  Cyril Hahn/Shy Girls/Henry Krinkle  -  Perfect Form - Henry Krinkle Remix
Antaares/Elepani  -  Se Acaba el Tiempo
spotify:track:32eOsmgDHGkwPfwm3KEVTr
Marenberg Kollektiv  -  Sun Goes Down - Original Mix
Le Flex  -  Kiss Me
Satin Jackets  -  You Make Me Feel Good - Deep Mix
Tenesha The Wordsmith/Lauren Ritter/JB  -  Thirst Trap
spotify:track:0C3h3kUiyRIyNP9tVIZDZP
Monoplay  -  Trust Me
Paradis  -  Instantané





/Users/alper/Documents/DJ/House Deep Vocal
Framewerk  -  As If - Radio Edit
Housenick/Nikko Culture  -  SaturDay Night - Nikko Culture Remix
spotify:track:0PdYp0gyoCZAwz8XAXWvQK
Serge Devant/Hadley  -  Addicted (feat. Hadley) - Radio Edit
* Cannot Find URI:  Serge Devant/Hadley  -  Addicted (feat. Hadley) - Radio Edit
Studio Deep/Cotry/Ian Tosel  -  Over & Over - Ian Tosel Remix
* Cannot Find URI:  Studio Deep/Cotry/Ian Tosel  -  Over & Over - Ian 

## Save Metadata as .csv(s) 

In [27]:
root_dj = "/Users/alper/Documents/DJ"
root_analysis = os.path.join(root_dj,'Analysis')
os.makedirs(root_analysis,exist_ok=True)

list_all_dfs = []
for key in dict_analysis:
    df = dict_analysis[key]
    df.to_csv(os.path.join(root_analysis,key+".csv"))
    if "DANCEABILITY" in df.columns:
        list_all_dfs.append(df.reset_index(drop=True))
    # df.reset_index(inplace=True, drop=True)
    
df_all = pd.concat(list_all_dfs)
df_all.to_csv(os.path.join(root_analysis,"Tracks.csv"))



In [56]:
# sine sigmoid similarity
# python dj.pj