In [1]:
'''Feature Extraction Helper Function'''
import librosa
import numpy as np
import pandas as pd
from os import listdir
from time import time
from os.path import isfile, join
from mutagen.easyid3 import EasyID3
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy

client_credentials_manager = SpotifyClientCredentials(client_id='7ef6fb261a0145f7a5b9911971999c34', client_secret="842d5810f3864355b256f2b9ca3cbcb5")
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
sp.trace=True

def extract_id3_feature_for_song(feature_name, song_path):
    feature_value = None
    try:    
        mp3info = EasyID3(song_path)
        feature_value = mp3info[feature_name][0] if feature_name in mp3info and len(mp3info[feature_name]) > 0 else 'unknown'            
    except:
        feature_value = "unknown"
    return feature_value.strip()

In [5]:
'''
    Extract features using Spotify package
    input: path to mp3 files
    output: csv file containing extracted features
    
    This function reads the content in a directory and for each mp3 file detected
    reads the file and extracts relevant features using spotify
'''

def extract_feature_spotify(path):
    id = 1  # Song ID
    feature_set = pd.DataFrame()  # Feature Matrix
    
    # Individual Feature Vectors
    songname_vector = pd.Series()
    genre = pd.Series()
    artist = pd.Series()
    title = pd.Series()
    album = pd.Series()
    sp_energy = pd.Series()
    sp_speechiness = pd.Series()
    sp_valence = pd.Series()
    sp_danceability = pd.Series()
    sp_acousticness = pd.Series()
    sp_instrumentalness = pd.Series()
    sp_speechiness = pd.Series()
    sp_tempo = pd.Series()
    sp_loudness = pd.Series()
    sp_liveness = pd.Series()
    
    # Traversing over each file in path
    file_data = [f for f in listdir(path) if isfile (join(path, f))]
    for line in file_data:
        if ( line[-1:] == '\n' ):
            line = line[:-1]
        # Reading Song
        songname = path + line
        song = line
        
#       Extracting Features
        songname_vector.at[id] = line  # song name
        genre.at[id] = extract_id3_feature_for_song('genre', songname)
        artist.at[id] = extract_id3_feature_for_song('artist', songname)
        title.at[id] = extract_id3_feature_for_song('title', songname)
        if title.at[id] == 'unknown':
            title.at[id] = song
        album.at[id] = extract_id3_feature_for_song('album', songname)
        print("title is ========== ",title.at[id])
        try:
            results = sp.search(q='artist:'+artist.at[id]+' track:'+title.at[id], type='track', limit=3)
            trackid = str(results['tracks']['items'][0]['id'])
            sp_features = sp.audio_features(trackid)[0]
            sp_energy.at[id] = sp_features['energy']
            sp_speechiness.at[id] = sp_features['speechiness']
            sp_valence.at[id] = sp_features['valence']
            sp_danceability.at[id] = sp_features['danceability']
            sp_acousticness.at[id] = sp_features['acousticness']
            sp_instrumentalness.at[id] = sp_features['instrumentalness']
            sp_speechiness.at[id] = sp_features['speechiness']
            sp_tempo.at[id] = sp_features['tempo']
            sp_loudness.at[id] = sp_features['loudness']
            sp_liveness.at[id] = sp_features['liveness']
        except Exception as e:
#             pass
            sp_features = 'NaN'
            sp_energy.at[id] = 'NaN'
            sp_speechiness.at[id] = 'NaN'
            sp_valence.at[id] = 'NaN'
            sp_danceability.at[id] = 'NaN'
            sp_acousticness.at[id] = 'NaN'
            sp_instrumentalness.at[id] = 'NaN'
            sp_speechiness.at[id] = 'NaN'
            sp_tempo.at[id] = 'NaN'
            sp_loudness.at[id] = 'NaN'
            sp_liveness.at[id] = 'NaN'

        print(songname + "---start")
        id = id+1
    
    # Concatenating Features into one csv and json format
    feature_set['song_name'] = songname_vector  # song name
    feature_set['genre'] = genre
    feature_set['artist'] = artist
    feature_set['title'] = title
    feature_set['album'] = album
    feature_set['sp_energy'] = sp_energy
    feature_set['sp_speechiness'] = sp_speechiness
    feature_set['sp_valence'] = sp_valence
    feature_set['sp_danceability'] = sp_danceability
    feature_set['sp_acousticness'] = sp_acousticness
    feature_set['sp_instrumentalness'] = sp_instrumentalness
    feature_set['sp_tempo'] = sp_tempo
    feature_set['sp_liveness'] = sp_liveness
    feature_set['sp_loudness'] = sp_loudness

    # Converting Dataframe into CSV Excel and JSON file
#     feature_set.to_csv('./spotify_trainingdata/trainingdata_no_liberosa_1new.csv')
#     feature_set.to_json('./spotify_trainingdata/trainingdata_no_liberosa_1new.json')
#     feature_set.to_csv('./spotify_trainingdata/trainingdata_no_liberosa_total.csv')
#     feature_set.to_json('./spotify_trainingdata/trainingdata_no_liberosa_total.json')
#     feature_set.to_csv('./testdata/testdata_spotify.csv')
#     feature_set.to_json('./testdata/testdata_spotify.json')
    feature_set.to_csv('./DemoData/demodata_spotify.csv')
    feature_set.to_json('./DemoData/demodata_spotify.json')
    
# Extracting Feature Function Call
# extract_feature('Songs/')
# extract_feature_spotify('songs-spotify/')
# extract_feature_spotify('trainningsongs/')
extract_feature_spotify('DemoSongs/')



headers {'Authorization': 'Bearer BQAyvF4Cnf6v2GhwMhj4cHtBH5An7PD6ZsKmdkD5VzGyYPCZNPLfBWGGeVF22O1aDbnqUk_msG7VrUdcaBI', 'Content-Type': 'application/json'}
http status 200
GET https://api.spotify.com/v1/search?q=artist%3AZee+Avi+track%3ADream+a+Little+Dream+of+Me&limit=3&offset=0&type=track
RESP {'tracks': {'href': 'https://api.spotify.com/v1/search?query=artist%3AZee+Avi+track%3ADream+a+Little+Dream+of+Me&type=track&offset=0&limit=3', 'items': [{'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6zGcYBjlNOMSVVrl7ZoGsH'}, 'href': 'https://api.spotify.com/v1/artists/6zGcYBjlNOMSVVrl7ZoGsH', 'id': '6zGcYBjlNOMSVVrl7ZoGsH', 'name': 'Zee Avi', 'type': 'artist', 'uri': 'spotify:artist:6zGcYBjlNOMSVVrl7ZoGsH'}], 'available_markets': ['AD', 'AR', 'AT', 'AU', 'BE', 'BG', 'BO', 'BR', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'EC', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HU', 'ID', 'IE', 'IL', 'IS', 'IT', 


headers {'Authorization': 'Bearer BQAyvF4Cnf6v2GhwMhj4cHtBH5An7PD6ZsKmdkD5VzGyYPCZNPLfBWGGeVF22O1aDbnqUk_msG7VrUdcaBI', 'Content-Type': 'application/json'}
http status 200
GET https://api.spotify.com/v1/audio-features/?ids=6SkoBaMd0lzoQjID5aV2d9
RESP {'audio_features': [{'danceability': 0.458, 'energy': 0.629, 'key': 5, 'loudness': -6.949, 'mode': 1, 'speechiness': 0.0441, 'acousticness': 0.366, 'instrumentalness': 7.52e-06, 'liveness': 0.102, 'valence': 0.416, 'tempo': 147.728, 'type': 'audio_features', 'id': '6SkoBaMd0lzoQjID5aV2d9', 'uri': 'spotify:track:6SkoBaMd0lzoQjID5aV2d9', 'track_href': 'https://api.spotify.com/v1/tracks/6SkoBaMd0lzoQjID5aV2d9', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/6SkoBaMd0lzoQjID5aV2d9', 'duration_ms': 209560, 'time_signature': 4}]}

DemoSongs/Jessie J - Flashlight.mp3---start

headers {'Authorization': 'Bearer BQAyvF4Cnf6v2GhwMhj4cHtBH5An7PD6ZsKmdkD5VzGyYPCZNPLfBWGGeVF22O1aDbnqUk_msG7VrUdcaBI', 'Content-Type': 'application/json'}
http


headers {'Authorization': 'Bearer BQAyvF4Cnf6v2GhwMhj4cHtBH5An7PD6ZsKmdkD5VzGyYPCZNPLfBWGGeVF22O1aDbnqUk_msG7VrUdcaBI', 'Content-Type': 'application/json'}
http status 200
GET https://api.spotify.com/v1/audio-features/?ids=0puf9yIluy9W0vpMEUoAnN
RESP {'audio_features': [{'danceability': 0.703, 'energy': 0.789, 'key': 0, 'loudness': -3.423, 'mode': 0, 'speechiness': 0.0928, 'acousticness': 0.292, 'instrumentalness': 0, 'liveness': 0.377, 'valence': 0.761, 'tempo': 150.058, 'type': 'audio_features', 'id': '0puf9yIluy9W0vpMEUoAnN', 'uri': 'spotify:track:0puf9yIluy9W0vpMEUoAnN', 'track_href': 'https://api.spotify.com/v1/tracks/0puf9yIluy9W0vpMEUoAnN', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0puf9yIluy9W0vpMEUoAnN', 'duration_ms': 199387, 'time_signature': 4}]}

DemoSongs/Bang Bang - Jessie J,Ariana Grande,Nicki Minaj.mp3---start


In [4]:
'''
    Extract features using librosa package
    input: path to mp3 files
    output: csv file containing extracted features
    
    This function reads the content in a directory and for each mp3 file detected
    reads the file and extracts relevant features using librosa library for audio
    signal processing
'''
def extract_feature_librosa(path):
    id = 1  # Song ID
    feature_set = pd.DataFrame()  # Feature Matrix
    
    # Individual Feature Vectors
    songname_vector = pd.Series()
    tempo_vector = pd.Series()
    total_beats = pd.Series()
    average_beats = pd.Series()
    chroma_stft_mean = pd.Series()
    chroma_stft_std = pd.Series()
    chroma_stft_var = pd.Series()
    chroma_cq_mean = pd.Series()
    chroma_cq_std = pd.Series()
    chroma_cq_var = pd.Series()
    chroma_cens_mean = pd.Series()
    chroma_cens_std = pd.Series()
    chroma_cens_var = pd.Series()
    mel_mean = pd.Series()
    mel_std = pd.Series()
    mel_var = pd.Series()
    mfcc_mean = pd.Series()
    mfcc_std = pd.Series()
    mfcc_var = pd.Series()
    mfcc_delta_mean = pd.Series()
    mfcc_delta_std = pd.Series()
    mfcc_delta_var = pd.Series()
    rmse_mean = pd.Series()
    rmse_std = pd.Series()
    rmse_var = pd.Series()
    cent_mean = pd.Series()
    cent_std = pd.Series()
    cent_var = pd.Series()
    spec_bw_mean = pd.Series()
    spec_bw_std = pd.Series()
    spec_bw_var = pd.Series()
    contrast_mean = pd.Series()
    contrast_std = pd.Series()
    contrast_var = pd.Series()
    rolloff_mean = pd.Series()
    rolloff_std = pd.Series()
    rolloff_var = pd.Series()
    poly_mean = pd.Series()
    poly_std = pd.Series()
    poly_var = pd.Series()
    tonnetz_mean = pd.Series()
    tonnetz_std = pd.Series()
    tonnetz_var = pd.Series()
    zcr_mean = pd.Series()
    zcr_std = pd.Series()
    zcr_var = pd.Series()
    harm_mean = pd.Series()
    harm_std = pd.Series()
    harm_var = pd.Series()
    perc_mean = pd.Series()
    perc_std = pd.Series()
    perc_var = pd.Series()
    frame_mean = pd.Series()
    frame_std = pd.Series()
    frame_var = pd.Series()
    genre = pd.Series()
    artist = pd.Series()
    title = pd.Series()
    album = pd.Series()
    
    # Traversing over each file in path
    file_data = [f for f in listdir(path) if isfile (join(path, f))]
    for line in file_data:
        if ( line[-1:] == '\n' ):
            line = line[:-1]

        # Reading Song
        songname = path + line
        song = line
        y, sr = librosa.load(songname, duration=60)
        S = np.abs(librosa.stft(y))
        
#       Extracting Features
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_cq = librosa.feature.chroma_cqt(y=y, sr=sr)
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
        melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
        rmse = librosa.feature.rmse(y=y)
        cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        poly_features = librosa.feature.poly_features(S=S, sr=sr)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        harmonic = librosa.effects.harmonic(y)
        percussive = librosa.effects.percussive(y)
        
        mfcc = librosa.feature.mfcc(y=y, sr=sr)#investigate mfcc, extract features from mfcc
        mfcc_delta = librosa.feature.delta(mfcc)
    
        onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
        frames_to_time = librosa.frames_to_time(onset_frames[:20], sr=sr)
        
        
        # Transforming Features
        songname_vector.at[id] = line  # song name
        tempo_vector.at[id] = tempo #temp
        total_beats.at[id] = sum(beats) # beats
        average_beats.at[id] = np.average(beats)
        chroma_stft_mean.at[id]= np.mean(chroma_stft)  # chroma stft
        chroma_stft_std.at[id] = np.std(chroma_stft)
        chroma_stft_var.at[id] = np.var(chroma_stft)
        chroma_cq_mean.at[id] = np.mean(chroma_cq)  # chroma cq
        chroma_cq_std.at[id] = np.std(chroma_cq)
        chroma_cq_var.at[id] = np.var(chroma_cq)
        chroma_cens_mean.at[id] = np.mean(chroma_cens)  # chroma cens
        chroma_cens_std.at[id] = np.std(chroma_cens)
        chroma_cens_var.at[id] = np.var(chroma_cens)
        mel_mean.at[id] = np.mean(melspectrogram)  # melspectrogram
        mel_std.at[id] = np.std(melspectrogram)
        mel_var.at[id] = np.var(melspectrogram)
        mfcc_mean.at[id] = np.mean(mfcc)  # mfcc
        mfcc_std.at[id] = np.std(mfcc)
        mfcc_var.at[id] = np.var(mfcc)
        mfcc_delta_mean.at[id] = np.mean(mfcc_delta)  # mfcc delta
        mfcc_delta_std.at[id] = np.std(mfcc_delta)
        mfcc_delta_var.at[id] = np.var(mfcc_delta)
        rmse_mean.at[id] = np.mean(rmse)  # rmse
        rmse_std.at[id] = np.std(rmse)
        rmse_var.at[id] = np.var(rmse)
        cent_mean.at[id] = np.mean(cent)  # cent
        cent_std.at[id] = np.std(cent)
        cent_var.at[id] = np.var(cent)
        spec_bw_mean.at[id] = np.mean(spec_bw)  # spectral bandwidth
        spec_bw_std.at[id] = np.std(spec_bw)
        spec_bw_var.at[id] = np.var(spec_bw)
        contrast_mean.at[id] = np.mean(contrast)  # contrast
        contrast_std.at[id] = np.std(contrast)
        contrast_var.at[id] = np.var(contrast)
        rolloff_mean.at[id] = np.mean(rolloff)  # rolloff
        rolloff_std.at[id] = np.std(rolloff)
        rolloff_var.at[id] = np.var(rolloff)
        poly_mean.at[id] = np.mean(poly_features)  # poly features
        poly_std.at[id] = np.std(poly_features)
        poly_var.at[id] = np.var(poly_features)
        tonnetz_mean.at[id] = np.mean(tonnetz)  # tonnetz
        tonnetz_std.at[id] = np.std(tonnetz)
        tonnetz_var.at[id] = np.var(tonnetz)
        zcr_mean.at[id] = np.mean(zcr)  # zero crossing rate
        zcr_std.at[id] = np.std(zcr)
        zcr_var.at[id] = np.var(zcr)
        harm_mean.at[id] = np.mean(harmonic)  # harmonic
        harm_std.at[id] = np.std(harmonic)
        harm_var.at[id] = np.var(harmonic)
        perc_mean.at[id] = np.mean(percussive)  # percussive
        perc_std.at[id] = np.std(percussive)
        perc_var.at[id] = np.var(percussive)
        frame_mean.at[id] = np.mean(frames_to_time)  # frames
        frame_std.at[id] = np.std(frames_to_time)
        frame_var.at[id] = np.var(frames_to_time)
        
        
        print(songname + "---start")
        id = id+1
    
    # Concatenating Features into one csv and json format
    feature_set['song_name'] = songname_vector  # song name
    feature_set['tempo'] = tempo_vector  # tempo 
    feature_set['total_beats'] = total_beats  # beats
    feature_set['average_beats'] = average_beats
    feature_set['chroma_stft_mean'] = chroma_stft_mean  # chroma stft
    feature_set['chroma_stft_std'] = chroma_stft_std
    feature_set['chroma_stft_var'] = chroma_stft_var
    feature_set['chroma_cq_mean'] = chroma_cq_mean  # chroma cq
    feature_set['chroma_cq_std'] = chroma_cq_std
    feature_set['chroma_cq_var'] = chroma_cq_var
    feature_set['chroma_cens_mean'] = chroma_cens_mean  # chroma cens
    feature_set['chroma_cens_std'] = chroma_cens_std
    feature_set['chroma_cens_var'] = chroma_cens_var
    feature_set['melspectrogram_mean'] = mel_mean  # melspectrogram
    feature_set['melspectrogram_std'] = mel_std
    feature_set['melspectrogram_var'] = mel_var
    feature_set['mfcc_mean'] = mfcc_mean  # mfcc
    feature_set['mfcc_std'] = mfcc_std
    feature_set['mfcc_var'] = mfcc_var
    feature_set['mfcc_delta_mean'] = mfcc_delta_mean  # mfcc delta
    feature_set['mfcc_delta_std'] = mfcc_delta_std
    feature_set['mfcc_delta_var'] = mfcc_delta_var
    feature_set['rmse_mean'] = rmse_mean  # rmse
    feature_set['rmse_std'] = rmse_std
    feature_set['rmse_var'] = rmse_var
    feature_set['cent_mean'] = cent_mean  # cent
    feature_set['cent_std'] = cent_std
    feature_set['cent_var'] = cent_var
    feature_set['spec_bw_mean'] = spec_bw_mean  # spectral bandwidth
    feature_set['spec_bw_std'] = spec_bw_std
    feature_set['spec_bw_var'] = spec_bw_var
    feature_set['contrast_mean'] = contrast_mean  # contrast
    feature_set['contrast_std'] = contrast_std
    feature_set['contrast_var'] = contrast_var
    feature_set['rolloff_mean'] = rolloff_mean  # rolloff
    feature_set['rolloff_std'] = rolloff_std
    feature_set['rolloff_var'] = rolloff_var
    feature_set['poly_mean'] = poly_mean  # poly features
    feature_set['poly_std'] = poly_std
    feature_set['poly_var'] = poly_var
    feature_set['tonnetz_mean'] = tonnetz_mean  # tonnetz
    feature_set['tonnetz_std'] = tonnetz_std
    feature_set['tonnetz_var'] = tonnetz_var
    feature_set['zcr_mean'] = zcr_mean  # zero crossing rate
    feature_set['zcr_std'] = zcr_std
    feature_set['zcr_var'] = zcr_var
    feature_set['harm_mean'] = harm_mean  # harmonic
    feature_set['harm_std'] = harm_std
    feature_set['harm_var'] = harm_var
    feature_set['perc_mean'] = perc_mean  # percussive
    feature_set['perc_std'] = perc_std
    feature_set['perc_var'] = perc_var
    feature_set['frame_mean'] = frame_mean  # frames
    feature_set['frame_std'] = frame_std
    feature_set['frame_var'] = frame_var
    
    
    # Converting Dataframe into CSV Excel and JSON file
#     feature_set.to_csv('./testdata/testdata_librosa.csv')
#     feature_set.to_json('./testdata/testdata_librosa.json')
    feature_set.to_csv('./DemoData/demodata_librosa.csv')
    feature_set.to_json('./DemoData/demodata_librosa.json')
    
# Extracting Feature Function Call
# extract_feature('Songs/')
# extract_feature_librosa('songsforpilotstudy/')
# extract_feature_librosa('trainningsongs/')
extract_feature_librosa('DemoSongs/')




DemoSongs/Dream a Little Dream of Me - Zee Avi.mp3---start
DemoSongs/Jessie J - Flashlight.mp3---start
DemoSongs/birdy - tee shirt.mp3---start
DemoSongs/Bang Bang - Jessie J,Ariana Grande,Nicki Minaj.mp3---start


In [6]:
'''
    Feature extraction using both librosa and spotify
    input: path to mp3 files
    output: csv file containing extracted features
    
    This function reads the content in a directory and for each mp3 file detected
    reads the file and extracts relevant features using librosa and spotify library for audio
    signal processing
'''
def extract_feature(path):
    id = 1  # Song ID
    feature_set = pd.DataFrame()  # Feature Matrix
    
    # Individual Feature Vectors
    songname_vector = pd.Series()
    tempo_vector = pd.Series()
    total_beats = pd.Series()
    average_beats = pd.Series()
    chroma_stft_mean = pd.Series()
    chroma_stft_std = pd.Series()
    chroma_stft_var = pd.Series()
    chroma_cq_mean = pd.Series()
    chroma_cq_std = pd.Series()
    chroma_cq_var = pd.Series()
    chroma_cens_mean = pd.Series()
    chroma_cens_std = pd.Series()
    chroma_cens_var = pd.Series()
    mel_mean = pd.Series()
    mel_std = pd.Series()
    mel_var = pd.Series()
    mfcc_mean = pd.Series()
    mfcc_std = pd.Series()
    mfcc_var = pd.Series()
    mfcc_delta_mean = pd.Series()
    mfcc_delta_std = pd.Series()
    mfcc_delta_var = pd.Series()
    rmse_mean = pd.Series()
    rmse_std = pd.Series()
    rmse_var = pd.Series()
    cent_mean = pd.Series()
    cent_std = pd.Series()
    cent_var = pd.Series()
    spec_bw_mean = pd.Series()
    spec_bw_std = pd.Series()
    spec_bw_var = pd.Series()
    contrast_mean = pd.Series()
    contrast_std = pd.Series()
    contrast_var = pd.Series()
    rolloff_mean = pd.Series()
    rolloff_std = pd.Series()
    rolloff_var = pd.Series()
    poly_mean = pd.Series()
    poly_std = pd.Series()
    poly_var = pd.Series()
    tonnetz_mean = pd.Series()
    tonnetz_std = pd.Series()
    tonnetz_var = pd.Series()
    zcr_mean = pd.Series()
    zcr_std = pd.Series()
    zcr_var = pd.Series()
    harm_mean = pd.Series()
    harm_std = pd.Series()
    harm_var = pd.Series()
    perc_mean = pd.Series()
    perc_std = pd.Series()
    perc_var = pd.Series()
    frame_mean = pd.Series()
    frame_std = pd.Series()
    frame_var = pd.Series()
    genre = pd.Series()
    artist = pd.Series()
    title = pd.Series()
    album = pd.Series()
    sp_energy = pd.Series()
    sp_speechiness = pd.Series()
    sp_valence = pd.Series()
    sp_danceability = pd.Series()
    sp_acousticness = pd.Series()
    sp_instrumentalness = pd.Series()
    sp_speechiness = pd.Series()
    sp_tempo = pd.Series()
    sp_loudness = pd.Series()
    sp_liveness = pd.Series()
    
    # Traversing over each file in path
    file_data = [f for f in listdir(path) if isfile (join(path, f))]
    for line in file_data:
        if ( line[-1:] == '\n' ):
            line = line[:-1]

        # Reading Song
        songname = path + line
        song = line
        y, sr = librosa.load(songname, duration=60)
        S = np.abs(librosa.stft(y))
        
#         Extracting Features
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_cq = librosa.feature.chroma_cqt(y=y, sr=sr)
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
        melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
        rmse = librosa.feature.rmse(y=y)
        cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        poly_features = librosa.feature.poly_features(S=S, sr=sr)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        harmonic = librosa.effects.harmonic(y)
        percussive = librosa.effects.percussive(y)
        
        mfcc = librosa.feature.mfcc(y=y, sr=sr)#investigate mfcc, extract features from mfcc
        mfcc_delta = librosa.feature.delta(mfcc)
    
        onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
        frames_to_time = librosa.frames_to_time(onset_frames[:20], sr=sr)
               
        # Transforming Features
        songname_vector.at[id] = line  # song name
        tempo_vector.at[id] = tempo #temp
        total_beats.at[id] = sum(beats) # beats
        average_beats.at[id] = np.average(beats)
        chroma_stft_mean.at[id]= np.mean(chroma_stft)  # chroma stft
        chroma_stft_std.at[id] = np.std(chroma_stft)
        chroma_stft_var.at[id] = np.var(chroma_stft)
        chroma_cq_mean.at[id] = np.mean(chroma_cq)  # chroma cq
        chroma_cq_std.at[id] = np.std(chroma_cq)
        chroma_cq_var.at[id] = np.var(chroma_cq)
        chroma_cens_mean.at[id] = np.mean(chroma_cens)  # chroma cens
        chroma_cens_std.at[id] = np.std(chroma_cens)
        chroma_cens_var.at[id] = np.var(chroma_cens)
        mel_mean.at[id] = np.mean(melspectrogram)  # melspectrogram
        mel_std.at[id] = np.std(melspectrogram)
        mel_var.at[id] = np.var(melspectrogram)
        mfcc_mean.at[id] = np.mean(mfcc)  # mfcc
        mfcc_std.at[id] = np.std(mfcc)
        mfcc_var.at[id] = np.var(mfcc)
        mfcc_delta_mean.at[id] = np.mean(mfcc_delta)  # mfcc delta
        mfcc_delta_std.at[id] = np.std(mfcc_delta)
        mfcc_delta_var.at[id] = np.var(mfcc_delta)
        rmse_mean.at[id] = np.mean(rmse)  # rmse
        rmse_std.at[id] = np.std(rmse)
        rmse_var.at[id] = np.var(rmse)
        cent_mean.at[id] = np.mean(cent)  # cent
        cent_std.at[id] = np.std(cent)
        cent_var.at[id] = np.var(cent)
        spec_bw_mean.at[id] = np.mean(spec_bw)  # spectral bandwidth
        spec_bw_std.at[id] = np.std(spec_bw)
        spec_bw_var.at[id] = np.var(spec_bw)
        contrast_mean.at[id] = np.mean(contrast)  # contrast
        contrast_std.at[id] = np.std(contrast)
        contrast_var.at[id] = np.var(contrast)
        rolloff_mean.at[id] = np.mean(rolloff)  # rolloff
        rolloff_std.at[id] = np.std(rolloff)
        rolloff_var.at[id] = np.var(rolloff)
        poly_mean.at[id] = np.mean(poly_features)  # poly features
        poly_std.at[id] = np.std(poly_features)
        poly_var.at[id] = np.var(poly_features)
        tonnetz_mean.at[id] = np.mean(tonnetz)  # tonnetz
        tonnetz_std.at[id] = np.std(tonnetz)
        tonnetz_var.at[id] = np.var(tonnetz)
        zcr_mean.at[id] = np.mean(zcr)  # zero crossing rate
        zcr_std.at[id] = np.std(zcr)
        zcr_var.at[id] = np.var(zcr)
        harm_mean.at[id] = np.mean(harmonic)  # harmonic
        harm_std.at[id] = np.std(harmonic)
        harm_var.at[id] = np.var(harmonic)
        perc_mean.at[id] = np.mean(percussive)  # percussive
        perc_std.at[id] = np.std(percussive)
        perc_var.at[id] = np.var(percussive)
        frame_mean.at[id] = np.mean(frames_to_time)  # frames
        frame_std.at[id] = np.std(frames_to_time)
        frame_var.at[id] = np.var(frames_to_time)
        
        genre.at[id] = extract_id3_feature_for_song('genre', songname)
        artist.at[id] = extract_id3_feature_for_song('artist', songname)
        title.at[id] = extract_id3_feature_for_song('title', songname)
        if title.at[id] == 'unknown':
            title.at[id] = song
        album.at[id] = extract_id3_feature_for_song('album', songname)
        
        try:
            results = sp.search(q='artist:'+artist.at[id]+' track:'+title.at[id], type='track', limit=3)
            trackid = str(results['tracks']['items'][0]['id'])
            sp_features = sp.audio_features(trackid)[0]
            sp_energy.at[id] = sp_features['energy']
            sp_speechiness.at[id] = sp_features['speechiness']
            sp_valence.at[id] = sp_features['valence']
            sp_danceability.at[id] = sp_features['danceability']
            sp_acousticness.at[id] = sp_features['acousticness']
            sp_instrumentalness.at[id] = sp_features['instrumentalness']
            sp_speechiness.at[id] = sp_features['speechiness']
            sp_tempo.at[id] = sp_features['tempo']
            sp_loudness.at[id] = sp_features['loudness']
            sp_liveness.at[id] = sp_features['liveness']
        except Exception as e:
#             pass
            sp_features = 'NaN'
            sp_energy.at[id] = 'NaN'
            sp_speechiness.at[id] = 'NaN'
            sp_valence.at[id] = 'NaN'
            sp_danceability.at[id] = 'NaN'
            sp_acousticness.at[id] = 'NaN'
            sp_instrumentalness.at[id] = 'NaN'
            sp_speechiness.at[id] = 'NaN'
            sp_tempo.at[id] = 'NaN'
            sp_loudness.at[id] = 'NaN'
            sp_liveness.at[id] = 'NaN'

        print(songname + "---start")
        id = id+1
    
    # Concatenating Features into one csv and json format
    feature_set['song_name'] = songname_vector  # song name
    feature_set['tempo'] = tempo_vector  # tempo 
    feature_set['total_beats'] = total_beats  # beats
    feature_set['average_beats'] = average_beats
    feature_set['chroma_stft_mean'] = chroma_stft_mean  # chroma stft
    feature_set['chroma_stft_std'] = chroma_stft_std
    feature_set['chroma_stft_var'] = chroma_stft_var
    feature_set['chroma_cq_mean'] = chroma_cq_mean  # chroma cq
    feature_set['chroma_cq_std'] = chroma_cq_std
    feature_set['chroma_cq_var'] = chroma_cq_var
    feature_set['chroma_cens_mean'] = chroma_cens_mean  # chroma cens
    feature_set['chroma_cens_std'] = chroma_cens_std
    feature_set['chroma_cens_var'] = chroma_cens_var
    feature_set['melspectrogram_mean'] = mel_mean  # melspectrogram
    feature_set['melspectrogram_std'] = mel_std
    feature_set['melspectrogram_var'] = mel_var
    feature_set['mfcc_mean'] = mfcc_mean  # mfcc
    feature_set['mfcc_std'] = mfcc_std
    feature_set['mfcc_var'] = mfcc_var
    feature_set['mfcc_delta_mean'] = mfcc_delta_mean  # mfcc delta
    feature_set['mfcc_delta_std'] = mfcc_delta_std
    feature_set['mfcc_delta_var'] = mfcc_delta_var
    feature_set['rmse_mean'] = rmse_mean  # rmse
    feature_set['rmse_std'] = rmse_std
    feature_set['rmse_var'] = rmse_var
    feature_set['cent_mean'] = cent_mean  # cent
    feature_set['cent_std'] = cent_std
    feature_set['cent_var'] = cent_var
    feature_set['spec_bw_mean'] = spec_bw_mean  # spectral bandwidth
    feature_set['spec_bw_std'] = spec_bw_std
    feature_set['spec_bw_var'] = spec_bw_var
    feature_set['contrast_mean'] = contrast_mean  # contrast
    feature_set['contrast_std'] = contrast_std
    feature_set['contrast_var'] = contrast_var
    feature_set['rolloff_mean'] = rolloff_mean  # rolloff
    feature_set['rolloff_std'] = rolloff_std
    feature_set['rolloff_var'] = rolloff_var
    feature_set['poly_mean'] = poly_mean  # poly features
    feature_set['poly_std'] = poly_std
    feature_set['poly_var'] = poly_var
    feature_set['tonnetz_mean'] = tonnetz_mean  # tonnetz
    feature_set['tonnetz_std'] = tonnetz_std
    feature_set['tonnetz_var'] = tonnetz_var
    feature_set['zcr_mean'] = zcr_mean  # zero crossing rate
    feature_set['zcr_std'] = zcr_std
    feature_set['zcr_var'] = zcr_var
    feature_set['harm_mean'] = harm_mean  # harmonic
    feature_set['harm_std'] = harm_std
    feature_set['harm_var'] = harm_var
    feature_set['perc_mean'] = perc_mean  # percussive
    feature_set['perc_std'] = perc_std
    feature_set['perc_var'] = perc_var
    feature_set['frame_mean'] = frame_mean  # frames
    feature_set['frame_std'] = frame_std
    feature_set['frame_var'] = frame_var
    feature_set['genre'] = genre
    feature_set['artist'] = artist
    feature_set['title'] = title
    feature_set['album'] = album
    feature_set['genre'] = genre
    feature_set['artist'] = artist
    feature_set['title'] = title
    feature_set['album'] = album
    feature_set['sp_energy'] = sp_energy
    feature_set['sp_speechiness'] = sp_speechiness
    feature_set['sp_valence'] = sp_valence
    feature_set['sp_danceability'] = sp_danceability
    feature_set['sp_acousticness'] = sp_acousticness
    feature_set['sp_instrumentalness'] = sp_instrumentalness
    feature_set['sp_speechiness'] = sp_speechiness
    feature_set['sp_tempo'] = sp_tempo
    feature_set['sp_liveness'] = sp_liveness
    feature_set['sp_loudness'] = sp_loudness

    # Converting Dataframe into CSV Excel and JSON file
#     feature_set.to_csv('./trainingdata_both/trainingdata_both_withnan.csv')
#     feature_set.to_json('./trainingdata_both/trainingdata_both_withnan.json')
#     feature_set.to_csv('./testdata/testdata_both_withnan.csv')
#     feature_set.to_json('./testdata/testdata_both_withnan.json')
#     feature_set.to_csv('./testdata/testdata_both.csv')
#     feature_set.to_json('./testdata/testdata_both.json')
    feature_set.to_csv('./DemoData/demodata_both.csv')
    feature_set.to_json('./DemoData/demodata_both.json')
       
# Extracting Feature Function Call
# extract_feature('Songs/')
# extract_feature('songsforpilotstudy/')
# extract_feature('songs-spotify/')
# extract_feature('trainningsongs/')
extract_feature('DemoSongs/')



headers {'Authorization': 'Bearer BQAyvF4Cnf6v2GhwMhj4cHtBH5An7PD6ZsKmdkD5VzGyYPCZNPLfBWGGeVF22O1aDbnqUk_msG7VrUdcaBI', 'Content-Type': 'application/json'}
http status 200
GET https://api.spotify.com/v1/search?q=artist%3AZee+Avi+track%3ADream+a+Little+Dream+of+Me&limit=3&offset=0&type=track
RESP {'tracks': {'href': 'https://api.spotify.com/v1/search?query=artist%3AZee+Avi+track%3ADream+a+Little+Dream+of+Me&type=track&offset=0&limit=3', 'items': [{'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6zGcYBjlNOMSVVrl7ZoGsH'}, 'href': 'https://api.spotify.com/v1/artists/6zGcYBjlNOMSVVrl7ZoGsH', 'id': '6zGcYBjlNOMSVVrl7ZoGsH', 'name': 'Zee Avi', 'type': 'artist', 'uri': 'spotify:artist:6zGcYBjlNOMSVVrl7ZoGsH'}], 'available_markets': ['AD', 'AR', 'AT', 'AU', 'BE', 'BG', 'BO', 'BR', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'EC', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HU', 'ID', 'IE', 'IL', 'IS', 'IT', 


headers {'Authorization': 'Bearer BQAyvF4Cnf6v2GhwMhj4cHtBH5An7PD6ZsKmdkD5VzGyYPCZNPLfBWGGeVF22O1aDbnqUk_msG7VrUdcaBI', 'Content-Type': 'application/json'}
http status 200
GET https://api.spotify.com/v1/search?q=artist%3ABirdy+track%3ATee+Shirt&limit=3&offset=0&type=track
RESP {'tracks': {'href': 'https://api.spotify.com/v1/search?query=artist%3ABirdy+track%3ATee+Shirt&type=track&offset=0&limit=3', 'items': [{'album': {'album_type': 'single', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2WX2uTcsvV5OnS0inACecP'}, 'href': 'https://api.spotify.com/v1/artists/2WX2uTcsvV5OnS0inACecP', 'id': '2WX2uTcsvV5OnS0inACecP', 'name': 'Birdy', 'type': 'artist', 'uri': 'spotify:artist:2WX2uTcsvV5OnS0inACecP'}], 'available_markets': ['AD', 'AR', 'AT', 'BE', 'BG', 'BO', 'BR', 'CA', 'CH', 'CL', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'DO', 'EC', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'GT', 'HK', 'HN', 'HU', 'ID', 'IE', 'IL', 'IS', 'IT', 'JP', 'LT', 'LU', 'LV', 'MC', 'MT', 'MY', 'NI