In [15]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import yaml
import pandas as pd
from pathlib import Path
import pickle
from util import save_csv, save_pickle

In [5]:
util.save_csv

<function util.save_csv(data, fid)>

In [23]:
class Spotify_query(object):
    def __init__(self, genre):
        # Basic directories
        self.base_dir = Path('.')
        self.data_dir = self.base_dir.joinpath("data")
        self.credential()
        self.genre=genre
        
    def credential(self):
        '''Authorization Code Flow'''
        cred = self.base_dir.joinpath('credential.yaml')
        with open(cred, "r") as f: 
            cred = yaml.load(f, Loader=yaml.CLoader)
        client_credentials_manager = SpotifyClientCredentials(
            client_id = cred['id'],
            client_secret = cred['secret'])
        self.sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
        
    def get_artists_genre(self):
        """
        Query all artists info from a certain genre.
        Save the artist's name, id, followers into a dataframe.
        """
        artist = []
        
        # query the total number of artists
        at = self.sp.search(q='genre:'+self.genre, limit=50, type='artist')['artists']
        print('Total number of artists for '+self.genre+': '+str(at['total']))
        
        # query all artists info
        if at['total']>50:
            for i in range(0,at['total'],50):
                artist+=self.sp.search(q='genre:'+self.genre,limit=50, type='artist', offset=i)['artists']['items']
                
        # create the df of artist id, name, popularity
        artist_df = []
        for i in artist:
            d = {
                'id': i['id'],
                'name': i['name'],
                'followers': i['followers']['total'],
                'at_popularity': i['popularity']
            }
            artist_df.append(d)
        artist_df = pd.DataFrame(artist_df)
        
        # save the artist info
        fid = self.data_dir.joinpath(f'artists_genre-{self.genre}.tsv')
        save_csv(artist_df, fid)
        
        return artist_df
    
    def get_albums_artists(self, artist_df):
        """
        Query all album info from a certain artist.
        Save the artist's name, id, album_id into a dataframe.
        """
        album_df = []
        for index, row in artist_df.iterrows():
            # get all album info of an artist
            album_ar = []
            al = self.sp.artist_albums(row['id'], limit=50)
            print('Total number of albums for '+row['name']+': '+str(al['total']))
            if al['total'] > 50:
                for i in range(0, al['total'], 50):
                    album_ar += self.sp.artist_albums(row['id'], limit=50, offset=i)['items']
            else:
                album_ar += self.sp.artist_albums(row['id'], limit=50)['items']
            # create the df of artist id, album id    
            for i in album_ar:
                d = {
                    'id': row['id'],
                    'name': row['name'],
                    'album_id': i['id']
                }
                album_df.append(d)    
        album_df = pd.DataFrame(album_df)
        # save data
        fid = self.data_dir.joinpath(f'artists_album_genre-{self.genre}.tsv')
        save_csv(album_df, fid)
        
        return album_df
    
    def get_songs_album(self, album_df):
        """
        Query all song basic and audio info from a certain album.
        Save the result into a dataframe.
        """
        song_df = []
        # get song info of each album
        for index, row in album_df.iterrows():
            song_al_df = []

            # get the song ids of each album
            song_al = self.sp.album_tracks(row['album_id'])['items']
            for i in song_al:
                d = {
                    'id': row['id'],
                    'name': row['name'],
                    'album_id': row['id'],
                    'release_date': row['release_date'],
                    'song_id': i['id']
                }
                song_al_df.append(d)
            song_al_df = pd.DataFrame(song_al_df)

            # get the basic info of each song of each album
            tr_al_df = []
            song_id_list = song_al_df['song_id'].to_list()
            tr_al = self.sp.tracks(song_id_list)['tracks']
            for i in tr_al:
                d = {
                    'song_id': i['id'],
                    'popularity': i['popularity']
                }
                tr_al_df.append(d)
            tr_al_df = pd.DataFrame(tr_al_df)

            # get the audio features of each song of each album
            ft_al = self.sp.audio_features(song_id_list)
            # remove potential none item in the list
            ft_al = list(filter(None.__ne__, ft_al))
            if not ft_al:
                ft_al_df = pd.DataFrame({"song_id": song_id_list})
            else:
                ft_al_df = pd.DataFrame(ft_al)[['danceability', 'energy', 'key', 'loudness', 'speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'id', 'duration_ms']]
                ft_al_df = ft_al_df.rename(columns={"id": "song_id"})

            # merge tables
            song_al_df = song_al_df.merge(tr_al_df, how='left', on='song_id')
            song_al_df = song_al_df.merge(ft_al_df, how='left', on='song_id')
            song_df.append(song_al_df)
            print('Get all songs info of '+row['name']+', '+row['album_id'])
        song_df = pd.concat(song_df)

        fid = self.data_dir.joinpath(f'songs_genre-{self.genre}.pkl')
        save_pickle(song_df, fid)
    

In [24]:
spf = Spotify_query(genre = 'mandopop')
artist=spf.get_artists_genre()


Total number of artists for mandopop: 569


In [25]:
albums = spf.get_albums_artists(artist)

Total number of albums for Jay Chou: 118
Total number of albums for JJ Lin: 96
Total number of albums for Eason Chan: 202
Total number of albums for G.E.M.: 74
Total number of albums for Eric Chou: 33
Total number of albums for Jacky Cheung: 245
Total number of albums for Faye Wong: 145
Total number of albums for Jolin Tsai: 75
Total number of albums for Mayday: 123
Total number of albums for Stefanie Sun: 52
Total number of albums for Ren Ran: 62
Total number of albums for Joker Xue: 60
Total number of albums for Fish Leong: 87
Total number of albums for Hebe Tien: 69
Total number of albums for Karen Mok: 173
Total number of albums for Leehom Wang: 62
Total number of albums for A-Mei Chang: 73
Total number of albums for Teresa Teng: 374
Total number of albums for Mao Buyi: 43
Total number of albums for Tanya Chua: 51
Total number of albums for Ronghao Li: 34
Total number of albums for Andy Lau: 153
Total number of albums for S.H.E: 82
Total number of albums for Sandy Lam: 176
Total nu

Questions:
What audio features contribute most for a successful artist in mandopop?
1. Overall popularity defined by followers
2. Overall audio features change over years
3. SVR analysis of each feature

In [40]:
song_df = []
# get song info of each album
for index, row in album_df.iterrows():
    song_al_df = []
    
    # get the song ids of each album
    song_al = sp.album_tracks(row['album_id'])['items']
    for i in song_al:
        d = {
            'id': row['id'],
            'name': row['name'],
            'album_id': row['id'],
            'song_id': i['id']
        }
        song_al_df.append(d)
    song_al_df = pd.DataFrame(song_al_df)
    
    # get the basic info of each song of each album
    tr_al_df = []
    song_id_list = song_al_df['song_id'].to_list()
    tr_al = sp.tracks(song_id_list)['tracks']
    for i in tr_al:
        d = {
            'song_id': i['id'],
            'popularity': i['popularity']
        }
        tr_al_df.append(d)
    tr_al_df = pd.DataFrame(tr_al_df)
    
    # get the audio features of each song of each album
    ft_al = sp.audio_features(song_id_list)
    # remove potential none item in the list
    ft_al = list(filter(None.__ne__, ft_al))
    if not ft_al:
        ft_al_df = pd.DataFrame({"song_id": song_id_list})
    else:
        ft_al_df = pd.DataFrame(ft_al)[['danceability', 'energy', 'key', 'loudness', 'speechiness',
           'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'id', 'duration_ms']]
        ft_al_df = ft_al_df.rename(columns={"id": "song_id"})
    
    # merge tables
    song_al_df = song_al_df.merge(tr_al_df, how='left', on='song_id')
    song_al_df = song_al_df.merge(ft_al_df, how='left', on='song_id')
    song_df.append(song_al_df)
    print('Get all songs info of '+row['name']+', '+row['album_id'])
song_df = pd.concat(song_af)

fid = data_dir.joinpath(f'songs_genre-{genre}.pkl')
save_pickle(song_df, fid)


Get all songs info of Jay Chou, 5lqE2qT3nGLW6FGoJYfwUT
Get all songs info of Jay Chou, 3mqG5FjCm7XuCx5IHN3ZSg
Get all songs info of Jay Chou, 5clrYp291Dg0XL6rgvOgoB
Get all songs info of Jay Chou, 0ByfRfYXHHhGjsXEabfDmf
Get all songs info of Jay Chou, 35nLe33atE2S9Oo4fBRWll
Get all songs info of Jay Chou, 0AMaK4fzYnhvi4HTNVvKet
Get all songs info of Jay Chou, 1aYDr6muZPmwYvstDnIbeH
Get all songs info of Jay Chou, 1TN7aPDawSKY2yJgGRuBtD
Get all songs info of Jay Chou, 2afPHXn7m4KQECRsVn41us
Get all songs info of Jay Chou, 2mfQMCY9GF34zQ8bqhIrmQ
Get all songs info of Jay Chou, 5Ld4kLIL16CwUAx7IbxtIQ
Get all songs info of Jay Chou, 6Py0A0ft3OlxvS6FgVvdJN
Get all songs info of Jay Chou, 2P1g6pMd57brcxfpBcb1cM
Get all songs info of Jay Chou, 77IElSKPvc2zdOh46FZgbe
Get all songs info of Jay Chou, 2kMP7E1hqEbZekixQBMiPg
Get all songs info of Jay Chou, 4cj9Li1HSSvRGEXsDU46dS
Get all songs info of Jay Chou, 7oz7WhrdSfsC3GkAUPOzgL
Get all songs info of Jay Chou, 6pmapc89HjY4f16ARzrmBB
Get all so

ConnectionError: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))

In [46]:
spf.sp.album_tracks('5clrYp291Dg0XL6rgvOgoB')['items'][0]

{'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2elBjNSdBE2Y3f0j1mjrql'},
   'href': 'https://api.spotify.com/v1/artists/2elBjNSdBE2Y3f0j1mjrql',
   'id': '2elBjNSdBE2Y3f0j1mjrql',
   'name': 'Jay Chou',
   'type': 'artist',
   'uri': 'spotify:artist:2elBjNSdBE2Y3f0j1mjrql'}],
 'available_markets': ['AD',
  'AE',
  'AG',
  'AL',
  'AM',
  'AO',
  'AR',
  'AT',
  'AU',
  'AZ',
  'BA',
  'BB',
  'BD',
  'BE',
  'BF',
  'BG',
  'BH',
  'BI',
  'BJ',
  'BN',
  'BO',
  'BR',
  'BS',
  'BT',
  'BW',
  'BY',
  'BZ',
  'CA',
  'CH',
  'CI',
  'CL',
  'CM',
  'CO',
  'CR',
  'CV',
  'CW',
  'CY',
  'CZ',
  'DE',
  'DJ',
  'DK',
  'DM',
  'DO',
  'DZ',
  'EC',
  'EE',
  'EG',
  'ES',
  'FI',
  'FJ',
  'FM',
  'FR',
  'GA',
  'GB',
  'GD',
  'GE',
  'GH',
  'GM',
  'GN',
  'GQ',
  'GR',
  'GT',
  'GW',
  'GY',
  'HK',
  'HN',
  'HR',
  'HT',
  'HU',
  'ID',
  'IE',
  'IL',
  'IN',
  'IS',
  'IT',
  'JM',
  'JO',
  'JP',
  'KE',
  'KG',
  'KH',
  'KI',
  'KM',
  'KN',
 

In [45]:
song_al_df = []
song_al = spf.sp.album_tracks('5clrYp291Dg0XL6rgvOgoB')['items']


for i in song_al:
    d = {
        'id': row['id'],
        'name': row['name'],
        'album_id': row['id'],
        'release_date': row['release_date'],
        'song_id': i['id'],
        'preview_url': i['preview_url']
    }
    song_al_df.append(d)
song_al_df = pd.DataFrame(song_al_df)
song_al_df

NameError: name 'row' is not defined

In [135]:
tr_al_df = []
song_id_list = song_al_df['song_id'].to_list()
tr_al = sp.tracks(song_id_list)['tracks']
for i in tr_al:
    d = {
        'song_id': i['id'],
        'popularity': i['popularity']
    }
    tr_al_df.append(d)
tr_al_df = pd.DataFrame(tr_al_df)

In [143]:
ft_al = sp.audio_features(song_al_df['song_id'].to_list())
ft_al_df = pd.DataFrame(ft_al)[['danceability', 'energy', 'key', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'id', 'duration_ms']]

ft_al_df = ft_al_df.rename(columns={"id": "song_id"})
ft_al_df

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,song_id,duration_ms
0,0.68,0.884,4,-6.508,0.0449,0.248,7e-06,0.0951,0.539,106.012,audio_features,7KYm8KPa0Vujh7kClLE4Qo,226213
1,0.522,0.829,1,-5.635,0.0317,0.00219,0.0,0.0793,0.247,117.989,audio_features,2dFvmGWAV9UW7vdDXhaxNL,266000
2,0.507,0.413,7,-7.49,0.0277,0.119,1.7e-05,0.0886,0.172,133.966,audio_features,5rQPgfJMJ7yhsEr0HwTA77,221267
3,0.63,0.922,9,-7.006,0.0332,0.277,0.0111,0.146,0.414,101.996,audio_features,3ofIiWBhx82EwJACDA49fQ,200720
4,0.549,0.939,7,-5.968,0.046,0.000106,0.0121,0.127,0.428,118.988,audio_features,1QwOvYi1IS5s79dBjotcup,201747
5,0.441,0.619,4,-6.609,0.0293,0.19,0.0,0.118,0.187,75.027,audio_features,7LyCtbxher5m97MImn5M3l,290133
6,0.662,0.924,7,-6.473,0.0417,0.00203,0.206,0.094,0.602,114.015,audio_features,2qm1nx1d1yAjYMZflBLNGC,195760
7,0.59,0.572,11,-7.658,0.0379,0.192,0.0,0.109,0.448,179.989,audio_features,1ivCIgrYZyE0BvItL4Z8lk,215147
8,0.566,0.851,5,-5.549,0.0284,0.00202,0.0,0.35,0.552,93.009,audio_features,5jur8XerpNRx0kEV4bSdOF,174053
9,0.411,0.671,2,-6.207,0.0319,0.0115,0.0,0.11,0.397,73.947,audio_features,4zWz6OmFpyXIgQCxAS5yQb,285616


['7KYm8KPa0Vujh7kClLE4Qo',
 '2dFvmGWAV9UW7vdDXhaxNL',
 '5rQPgfJMJ7yhsEr0HwTA77',
 '3ofIiWBhx82EwJACDA49fQ',
 '1QwOvYi1IS5s79dBjotcup',
 '7LyCtbxher5m97MImn5M3l',
 '2qm1nx1d1yAjYMZflBLNGC',
 '1ivCIgrYZyE0BvItL4Z8lk',
 '5jur8XerpNRx0kEV4bSdOF',
 '4zWz6OmFpyXIgQCxAS5yQb']

In [120]:
sp.tracks(song_al_df['song_id'].to_list())['tracks'][0]['popularity']
sp.tracks(song_al_df['song_id'].to_list())['tracks'][0]['duration_ms']
sp.tracks(song_al_df['song_id'].to_list())['tracks'][0]['id']

'7KYm8KPa0Vujh7kClLE4Qo'

In [118]:
sp.tracks(song_al_df['song_id'].to_list())['tracks'][0]

{'album': {'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2elBjNSdBE2Y3f0j1mjrql'},
    'href': 'https://api.spotify.com/v1/artists/2elBjNSdBE2Y3f0j1mjrql',
    'id': '2elBjNSdBE2Y3f0j1mjrql',
    'name': 'Jay Chou',
    'type': 'artist',
    'uri': 'spotify:artist:2elBjNSdBE2Y3f0j1mjrql'}],
  'available_markets': ['AD',
   'AE',
   'AG',
   'AL',
   'AM',
   'AO',
   'AR',
   'AT',
   'AU',
   'AZ',
   'BA',
   'BB',
   'BD',
   'BE',
   'BF',
   'BG',
   'BH',
   'BI',
   'BJ',
   'BN',
   'BO',
   'BR',
   'BS',
   'BT',
   'BW',
   'BY',
   'BZ',
   'CA',
   'CH',
   'CI',
   'CL',
   'CM',
   'CO',
   'CR',
   'CV',
   'CW',
   'CY',
   'CZ',
   'DE',
   'DJ',
   'DK',
   'DM',
   'DO',
   'DZ',
   'EC',
   'EE',
   'EG',
   'ES',
   'FI',
   'FJ',
   'FM',
   'FR',
   'GA',
   'GB',
   'GD',
   'GE',
   'GH',
   'GM',
   'GN',
   'GQ',
   'GR',
   'GT',
   'GW',
   'GY',
   'HK',
   'HN',
   'HR',
   'HT',
   'HU',
   'ID',
   

In [46]:
track_df = []
for index, row in artist_df.iterrows():
    track = []
    # query the total number of tracks of a given artist
    tr = sp.search(q='artist:'+row['name'],limit=50,type='track')['tracks']
    print('Total number of tracks for '+row['name']+': '+str(tr['total']))
    # query all tracks info
    if tr['total']>50:
        for i in range(0, tr['total'], 50):
            track+=sp.search(q='artist:'+row['name'], limit=50, type='track', offset=i)['tracks']['items']
    else:
         track+=sp.search(q='artist:'+row['name'], limit=50, type='track')['tracks']['items']
    # create the df of track_id, 
    for i in track:
        d = {
            'id': row['id'],
            'track_id': i['id'],
            'duration_ms': i['duration_ms'],
            'release_date': i['album']['release_date'],
            'popularity': i['popularity']
        }
        track_df.append(d)
track_df = pd.DataFrame(track_df)
fid = data_dir.joinpath(f'tracks_genre-{genre}.tsv')
save_csv(track_df, fid)

Total number of tracks for Jay Chou: 390
Total number of tracks for JJ Lin: 387
Total number of tracks for Eason Chan: 1185
Total number of tracks for G.E.M.: 367
Total number of tracks for Eric Chou: 112
Total number of tracks for Jacky Cheung: 913
Total number of tracks for Jolin Tsai: 430
Total number of tracks for Faye Wong: 733
Total number of tracks for Mayday: 2471
Total number of tracks for Stefanie Sun: 344
Total number of tracks for Ren Ran: 806
Total number of tracks for Joker Xue: 116
Total number of tracks for Fish Leong: 315
Total number of tracks for Hebe Tien: 270
Total number of tracks for Karen Mok: 489
Total number of tracks for Leehom Wang: 374
Total number of tracks for A-Mei Chang: 411
Total number of tracks for Teresa Teng: 3757
Total number of tracks for Mao Buyi: 116
Total number of tracks for Tanya Chua: 199
Total number of tracks for Ronghao Li: 91
Total number of tracks for Andy Lau: 1202
Total number of tracks for S.H.E: 506
Total number of tracks for Sandy

KeyboardInterrupt: 

In [36]:
track = []
for i in range(0, tr['total'], 50):
    track+=sp.search(q='artist:'+'Eason Chan',limit=50,type='track')['tracks']['items']

In [48]:
track[0]['popularity']

55

In [30]:
artist_df

Unnamed: 0,id,name,followers
0,2elBjNSdBE2Y3f0j1mjrql,Jay Chou,2456028
1,7Dx7RhX0mFuXhCOUgB01uM,JJ Lin,1776088
2,2QcZxAgcs2I1q7CtCkl6MI,Eason Chan,1350569
3,7aRC4L63dBn3CiLDuWaLSI,G.E.M.,1574638
4,5fEQLwq1BWWQNR8GzhOIvi,Eric Chou,805933
...,...,...,...
565,7IhoiR1VotrcXWaviaSPNg,Nana Lee,10883
566,7lBz9amGhtwyMiwMUvlQwX,颜黎明,640
567,7vHny6pPDgjIHoVT9vNdne,10vacations,300
568,4ko19t9P3n3la4xALra1bk,Hagen Troy,61


In [9]:
def save_csv(data, fid):
    data.to_csv(fid, sep='\t', float_format='%.5f', na_rep='n/a', index=False)
    

In [10]:
def save_pickle(data, fid):
    open_file = open(fid, "wb")
    pickle.dump(data, open_file)
    open_file.close()
    

In [None]:
# outdir
out_dir = Path('/projects/hulacon/shared/nsd_results/yufei/codes_yufei/content/fc')
out_dir.mkdir(exist_ok=True, parents=True)
out_fid = out_dir.joinpath(
            f'sub-{sub_id}_fc.pkl')

open_file = open(out_fid, "wb")
pickle.dump(fc_features, open_file)
open_file.close()

Unnamed: 0,id,name,followers
0,2elBjNSdBE2Y3f0j1mjrql,Jay Chou,2456028
1,7Dx7RhX0mFuXhCOUgB01uM,JJ Lin,1776088
2,2QcZxAgcs2I1q7CtCkl6MI,Eason Chan,1350569
3,7aRC4L63dBn3CiLDuWaLSI,G.E.M.,1574638
4,5fEQLwq1BWWQNR8GzhOIvi,Eric Chou,805933
...,...,...,...
565,7IhoiR1VotrcXWaviaSPNg,Nana Lee,10883
566,7lBz9amGhtwyMiwMUvlQwX,颜黎明,640
567,7vHny6pPDgjIHoVT9vNdne,10vacations,300
568,4ko19t9P3n3la4xALra1bk,Hagen Troy,61


In [86]:
sp.audio_features(['56wVfJKtnwlSZtC4NVgIrf'])

[{'danceability': 0.512,
  'energy': 0.35,
  'key': 10,
  'loudness': -11.843,
  'mode': 1,
  'speechiness': 0.0311,
  'acousticness': 0.478,
  'instrumentalness': 1.15e-06,
  'liveness': 0.0855,
  'valence': 0.343,
  'tempo': 152.023,
  'type': 'audio_features',
  'id': '56wVfJKtnwlSZtC4NVgIrf',
  'uri': 'spotify:track:56wVfJKtnwlSZtC4NVgIrf',
  'track_href': 'https://api.spotify.com/v1/tracks/56wVfJKtnwlSZtC4NVgIrf',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/56wVfJKtnwlSZtC4NVgIrf',
  'duration_ms': 222333,
  'time_signature': 4}]

In [9]:
import librosa
from math import ceil

In [10]:
y, _ = librosa.load('/projects/hulacon/shared/nsd_results/yufei/spotify_analysis/data/preview_mp3/2o1mun3mrgfPLesJVJodQo.mp3')


class ParamsR:

    audio_len = 30
    srt = 22050 # default sample rate
    wsz = 4096 # samples per frame 
    mels = 128
    hop_length = wsz // 2 # determines the overlap between windows
    feat_len = int(ceil(srt * audio_len / float(hop_length)))

params = ParamsR




In [6]:
feat = librosa.feature.melspectrogram(
        y=y, sr=params.srt, n_fft=params.wsz,
        hop_length=params.hop_length, n_mels=params.mels)

In [7]:
feat

array([[1.1708965e+03, 6.9470453e+02, 1.4125255e+02, ..., 1.5560019e+02,
        1.3932365e+02, 7.3504898e+01],
       [6.6824556e+03, 3.8244651e+03, 8.0430762e+02, ..., 2.7518835e+02,
        8.9813702e+02, 1.3332767e+03],
       [1.5189962e+03, 8.5372607e+02, 2.5123970e+02, ..., 2.6697885e+02,
        3.6892621e+02, 9.9287561e+02],
       ...,
       [4.1808486e-02, 1.8216912e-01, 8.0377555e-01, ..., 5.2339263e-02,
        4.3328349e-02, 2.5055155e-01],
       [3.0951913e-02, 3.3815037e-02, 1.9981806e-01, ..., 2.0913120e-02,
        1.8973369e-02, 8.3663397e-02],
       [4.4055209e-03, 1.6785861e-03, 1.5131173e-02, ..., 9.5728209e-04,
        2.1050326e-03, 5.5263396e-03]], dtype=float32)

In [12]:
"""
script credit to https://github.com/OckhamsRazor/HSP_CNN
"""

import argparse
import glob
import multiprocessing
from functools import partial
from math import ceil
from os import mkdir, path
from shutil import rmtree
import librosa
import numpy as np
import warnings

# librosa.load will always raise a warning, ignore it
warnings.filterwarnings("ignore")

class ParamsR:

    audio_len = 30
    srt = 22050 # default sample rate
    wsz = 4096 # samples per frame 
    mels = 128
    hop_length = wsz // 2 # determines the overlap between windows
    feat_len = int(ceil(srt * audio_len / float(hop_length)))

params = ParamsR


def _feat_extract(fn, out_p):
    
    # get song id 
    sid = fn.split('/')[-1].split('.')[0]

    # Extract audio timeseries    
    y, sr = librosa.load(fn, sr=params.srt)
    song_len = len(y) / float(params.srt)

    if song_len < params.audio_len/2:
        print(sid)
        
    # Compute a melody-scaled spectrogram, shape = (n_mels, t)
    feat = librosa.feature.melspectrogram(
        y=y, sr=params.srt, n_fft=params.wsz,
        hop_length=params.hop_length, n_mels=params.mels)
    if song_len == params.audio_len:
        ret = feat
    elif song_len > params.audio_len: # select the middle 30s portion 
        start = feat.shape[1] // 2 - params.feat_len // 2
        end = feat.shape[1] // 2 + params.feat_len // 2
        if params.feat_len % 2 == 1:
            end += 1
        ret = feat[:, start:end]
    else: # padd it 
        ret = np.zeros((params.mels, params.feat_len))
        ret[:, :feat.shape[1]] = feat.copy()
    np.save(
        path.join(out_p, sid), ret.reshape(1, params.mels, params.feat_len))
    
    return 0  # success


def main():

    song_dir = './data/preview_mp3/'
    feat_dir = './mel_features/'
    if path.exists(feat_dir):
        rmtree(feat_dir)
    mkdir(feat_dir)

    songs = glob.glob('{}/*mp3'.format(song_dir))
    pool = multiprocessing.Pool(28)
    excpt = pool.map(partial(_feat_extract, out_p=feat_dir), songs[:])
    pool.close()
    pool.join()
    #for fn in songs: 
    #    _feat_extract(fn, out_p=feat_dir)


if __name__ == "__main__":
    main()

KeyboardInterrupt: 