In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark.sql.functions import col, avg, when
from pyspark.mllib.regression import LabeledPoint
from pyspark.mllib.tree import DecisionTree

import time
import sys
import os
import glob
import hdf5_getters
import re

# (8 cores, 16gb per machine) x 5 = 40 cores

# New API
spark_session = SparkSession\
        .builder\
        .master("spark://192.168.1.79:7077") \
        .appName("SongHotness")\
        .config("spark.dynamicAllocation.enabled", True)\
        .config("spark.shuffle.service.enabled", True)\
        .config("spark.dynamicAllocation.executorIdleTimeout","30s")\
        .config("spark.executor.cores",1)\
        .getOrCreate()

# Old API (RDD)
spark_context = spark_session.sparkContext
sqlContext = SQLContext(spark_context)

def rowUnpack(df):
        results={}
        for i in df:
            results.update(i.asDict())
        return(results)

In [2]:
class Song:
    songCount = 0
    # songDictionary = {}

    def __init__(self, songID):
        self.id = songID
        Song.songCount += 1
        # Song.songDictionary[songID] = self

        self.albumName = None
        self.albumID = None
        self.artistID = None
        self.artistLatitude = None
        self.artistLocation = None
        self.artistLongitude = None
        self.artistName = None
        self.danceability = None
        self.duration = None
        self.energy = None
        self.genreList = []
        self.keySignature = None
        self.keySignatureConfidence = None
        self.lyrics = None
        self.popularity = None
        self.songhotttnesss = None
        self.tempo = None
        self.timeSignature = None
        self.timeSignatureConfidence = None
        self.title = None
        self.year = None

    def displaySongCount(self):
        print("Total Song Count %i" % Song.songCount)

    def displaySong(self):
        print("ID: %s" % self.id)  
    
    

In [3]:
    
"""
Original:

Alexis Greenstreet (October 4, 2015) University of Wisconsin-Madison

This code is designed to convert the HDF5 files of the Million Song Dataset
to a CSV by extracting various song properties.

The script writes to a "SongCSV.csv" in the directory containing this script.

Please note that in the current form, this code only extracts the following
information from the HDF5 files:
AlbumID, AlbumName, ArtistID, ArtistLatitude, ArtistLocation,
ArtistLongitude, ArtistName, Danceability, Duration, KeySignature,
KeySignatureConfidence, SongID, Tempo, TimeSignature,
TimeSignatureConfidence, Title, and Year.

This file also requires the use of "hdf5_getters.py", written by
Thierry Bertin-Mahieux (2010) at Columbia University

Credit:
This HDF5 to CSV code makes use of the following example code provided
at the Million Song Dataset website 
(Home>Tutorial/Iterate Over All Songs, 
http://labrosa.ee.columbia.edu/millionsong/pages/iterate-over-all-songs),
Which gives users the following code to get all song titles:

#import os
#import glob
#import hdf5_getters
#def get_all_titles(basedir,ext='.h5') :
#    titles = []
#   for root, dirs, files in os.walk(basedir):
#        files = glob.glob(os.path.join(root,'*'+ext))
#        for f in files:
#            h5 = hdf5_getters.open_h5_file_read(f)
#            titles.append( hdf5_getters.get_title(h5) )
#            h5.close()
#   return titles 
"""

"""This code has been modified for python3 compatibility by Karthik Nair(28 May 2019) of Uppsala University (github.com/karnair)"""
"""This code has been adapted for conversion of .h5 files to PySpark Dataframe by Karthik Nair(3 June 2019) of Uppsala University (github.com/karnair)"""



'This code has been adapted for conversion of .h5 files to PySpark Dataframe by Karthik Nair(3 June 2019) of Uppsala University (github.com/karnair)'

In [4]:
def h5toPysDf():
    
   
    song_number = []
    album_id = []
    artist_latitude = []
    artist_location = []
    artist_longitude = []
    artist_name =[]
    danceability = []
    duration = []
    energy = []
    key_signature = []
    key_signature_confidence = []
    song_id = []
    tempo = []
    song_hotttnesss = []
    time_signature = []
    time_signature_confidence = []
    title = []
    year = []
    
    
    
    csvRowString = ("SongID,AlbumID,ArtistLatitude,ArtistLocation,"+
            "ArtistLongitude,ArtistName,Danceability,Duration,Energy,KeySignature,"+
            "KeySignatureConfidence,SongHotttnesss,Tempo,TimeSignature,TimeSignatureConfidence,"+
            "Title,Year")
    
    csvAttributeList = re.split('\W+', csvRowString)
    
    for i, v in enumerate(csvAttributeList):
        csvAttributeList[i] = csvAttributeList[i].lower()
        csvRowString = ""
    
    basedir = "/home/ubuntu/MillionSongSubset/data" # "." As the default means the current directory
    ext = ".h5"
    
    for root, dirs, files in os.walk(basedir):        
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            print(f)

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            #song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            #song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = float(hdf5_getters.get_danceability(songH5File))
            song.duration = float(hdf5_getters.get_duration(songH5File))
            song.energy = float(hdf5_getters.get_energy(songH5File))
            # song.setGenreList()
            song.keySignature = float(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = float(hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = float(hdf5_getters.get_tempo(songH5File))
            song.songhotttnesss = float(hdf5_getters.get_song_hotttnesss(songH5File))
            song.timeSignature = float(hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = float(hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            
            
            
            #csvRowString += str(song.songCount) + ","
            song_number.append(song.songCount)

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    #csvRowString += song.albumID
                    album_id.append(song.albumID)
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    artist_latitude.append(latitude)
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',','')
                    artist_location.append(location) 
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    artist_longitude.append(longitude)   
                elif attribute == 'ArtistName'.lower():
                    artist_name.append(song.artistName)
                elif attribute == 'Danceability'.lower():
                    danceability.append(song.danceability)
                elif attribute == 'Duration'.lower():
                    duration.append(song.duration)
                elif attribute == 'Energy'.lower():
                    energy.append(song.energy)
                elif attribute == 'KeySignature'.lower():
                    key_signature.append(song.keySignature)
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence                                 
                    key_signature_confidence.append(song.keySignatureConfidence)
                elif attribute == 'SongID'.lower():
                    song_id.append(song.id)
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    tempo.append(song.tempo)
                elif attribute == 'SongHotttnesss'.lower():
                    song_hotttnesss.append(song.songhotttnesss)
                elif attribute == 'TimeSignature'.lower():
                    time_signature.append(song.timeSignature)
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence                                   
                    time_signature_confidence.append(song.timeSignatureConfidence)
                elif attribute == 'Title'.lower():
                    title.append(song.title)
                elif attribute == 'Year'.lower():
                    year.append(song.year)
                #"""else:
                 #   csvRowString += "Erm. This didn't work. Error. :( :(\n" """"

                #csvRowString += ","
            songH5File.close()
    pysp_df = sqlContext.createDataFrame(zip(song_number, album_id, artist_latitude, artist_location, artist_longitude, artist_name, danceability, duration, energy, key_signature, key_signature_confidence, song_id, tempo, song_hotttnesss, time_signature, time_signature_confidence, title, year), schema=['song_number', 'album_id', 'artist_latitude', 'artist_location', 'artist_longitude', 'artist_name', 'danceability', 'duration', 'energy', 'key_signature', 'key_signature_confidence', 'song_id', 'tempo', 'song_hotttnesss', 'time_signature', 'time_signature_confidence', 'title', 'year'])
    return pysp_df


In [None]:
#song_number, album_id, artist_latitude, artist_location, artist_longitude, artist_name, danceability, duration, energy, key_signature, key_signature_confidence, song_id, tempo, song_hotttnesss, time_signature, time_signature_confidence, title, year = main()
msd_df = h5toPysDf()

/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTUV128F933D382.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTJH128F1476BC7.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTSI128E0790E7B.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTDV128F934F6B1.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTQB128F92FAB03.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTFO128EF345397.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTZW12903CE7495.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTJG12903CD5B47.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTOM128F42309C2.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTUD128F42A10A3.h5
/home/ubuntu/MillionSongSubset/data/B/B/T/TRBBTLA128F4226907.h5
/home/ubuntu/MillionSongSubset/data/B/B/S/TRBBSFL128F931798C.h5
/home/ubuntu/MillionSongSubset/data/B/B/S/TRBBSBF128F14687A4.h5
/home/ubuntu/MillionSongSubset/data/B/B/S/TRBBSKC128F1496C4E.h5
/home/ubuntu/MillionSongSubset/data/B/B/S/TRBBSGI128F9309855.h5
/home/ubuntu/MillionSongSubset/data/B/B/

/home/ubuntu/MillionSongSubset/data/B/B/V/TRBBVFT128F42ABB02.h5
/home/ubuntu/MillionSongSubset/data/B/B/V/TRBBVBM128F4276D82.h5
/home/ubuntu/MillionSongSubset/data/B/B/V/TRBBVAH128F4270300.h5
/home/ubuntu/MillionSongSubset/data/B/B/V/TRBBVXL128F427C3F2.h5
/home/ubuntu/MillionSongSubset/data/B/B/V/TRBBVPM12903CDB094.h5
/home/ubuntu/MillionSongSubset/data/B/B/V/TRBBVTH128F92F3B6F.h5
/home/ubuntu/MillionSongSubset/data/B/B/V/TRBBVYO128F146AEDC.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBRJN128F425C040.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBRME128F4276C73.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBRYI128F42370F5.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBRFQ12903CE2997.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBRVT128F428EA7A.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBRIQ128F9353B5C.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBRQE12903C9E587.h5
/home/ubuntu/MillionSongSubset/data/B/B/R/TRBBREF128F933CCDD.h5
/home/ubuntu/MillionSongSubset/data/B/B/

/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBANU128F422B87B.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBADO12903CEE832.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBAHD128F428E0FE.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBANR128F4277C5B.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBASV128F427680E.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBAQJ128F92F1336.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBAAZ12903CAEA6D.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBADQ128F933698F.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBAIC12903D11771.h5
/home/ubuntu/MillionSongSubset/data/B/B/A/TRBBAKQ128F92F0A8E.h5
/home/ubuntu/MillionSongSubset/data/B/E/T/TRBETHV128F429722F.h5
/home/ubuntu/MillionSongSubset/data/B/E/T/TRBETVK128F4288B49.h5
/home/ubuntu/MillionSongSubset/data/B/E/T/TRBETSW12903CA6A6B.h5
/home/ubuntu/MillionSongSubset/data/B/E/T/TRBETRA128F9327DC6.h5
/home/ubuntu/MillionSongSubset/data/B/E/S/TRBESXG128F4243BFB.h5
/home/ubuntu/MillionSongSubset/data/B/E/

/home/ubuntu/MillionSongSubset/data/B/E/V/TRBEVTM128F92FE2B2.h5
/home/ubuntu/MillionSongSubset/data/B/E/V/TRBEVJC12903CCE75B.h5
/home/ubuntu/MillionSongSubset/data/B/E/V/TRBEVND128F93429DD.h5
/home/ubuntu/MillionSongSubset/data/B/E/V/TRBEVZN128F9330A0D.h5
/home/ubuntu/MillionSongSubset/data/B/E/V/TRBEVSH128F92D5CFC.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBEROX128F4270745.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERKY128F42290CA.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERHG128F933AC46.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERUM128F148AC93.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERCZ128E0787D93.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERLZ12903CC4747.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERHS128F92FF079.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERGT128F42A009F.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERJE128F425F098.h5
/home/ubuntu/MillionSongSubset/data/B/E/R/TRBERIX12903CEF907.h5
/home/ubuntu/MillionSongSubset/data/B/E/

/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBLK128F4264240.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBKU128F422A0E3.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBLJ12903CCF436.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBAQ128F93279F5.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBFY128F932C0E3.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBTE128E078E882.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBSN128F92FBA64.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBTU128F92FDC9D.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBSK128F92E5624.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBVM128F4262E9D.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBKA128F42947BF.h5
/home/ubuntu/MillionSongSubset/data/B/I/B/TRBIBOJ128F14618D4.h5
/home/ubuntu/MillionSongSubset/data/B/I/J/TRBIJLT12903CE7070.h5
/home/ubuntu/MillionSongSubset/data/B/I/J/TRBIJRN128F425F3DD.h5
/home/ubuntu/MillionSongSubset/data/B/I/J/TRBIJMU12903CF892B.h5
/home/ubuntu/MillionSongSubset/data/B/I/

/home/ubuntu/MillionSongSubset/data/B/C/S/TRBCSSP128F933B1D2.h5
/home/ubuntu/MillionSongSubset/data/B/C/S/TRBCSBO128F9332822.h5
/home/ubuntu/MillionSongSubset/data/B/C/S/TRBCSLC128F421BD25.h5
/home/ubuntu/MillionSongSubset/data/B/C/S/TRBCSSE12903CBE261.h5
/home/ubuntu/MillionSongSubset/data/B/C/S/TRBCSLF128E0795524.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBMG12903CDE2A4.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBXZ128F14628C3.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBVO128F92D4299.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBBR12903CA2FA7.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBQT128F426B79A.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBFV128E0795B94.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBMO128F92C77B9.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBSK12903CA2DD3.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBLV128F14ADF31.h5
/home/ubuntu/MillionSongSubset/data/B/C/B/TRBCBOY12903CC66C6.h5
/home/ubuntu/MillionSongSubset/data/B/C/

/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRJJ128F145D998.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRSH128F9327458.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRUH128F930DB67.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRLW128F425AC34.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRKZ128F4241B9E.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRFK128F935E655.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRCU12903CD3E19.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRCV128F933860B.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRVT128F4264E2A.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRDW128F422D8BA.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRMX128F92EBE1F.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRRZ12903CEBAC1.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRNO128F92E2AA2.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRVT128F93073E7.h5
/home/ubuntu/MillionSongSubset/data/B/C/R/TRBCRKF128F14AD398.h5
/home/ubuntu/MillionSongSubset/data/B/C/

/home/ubuntu/MillionSongSubset/data/B/F/T/TRBFTZS128F425EAAB.h5
/home/ubuntu/MillionSongSubset/data/B/F/T/TRBFTJP128F426AC3F.h5
/home/ubuntu/MillionSongSubset/data/B/F/T/TRBFTWM128F932535D.h5
/home/ubuntu/MillionSongSubset/data/B/F/T/TRBFTHM12903CCC8FD.h5
/home/ubuntu/MillionSongSubset/data/B/F/T/TRBFTGO128F92CF831.h5
/home/ubuntu/MillionSongSubset/data/B/F/S/TRBFSAI128F92F4AC0.h5
/home/ubuntu/MillionSongSubset/data/B/F/S/TRBFSPK128F92F3A22.h5
/home/ubuntu/MillionSongSubset/data/B/F/S/TRBFSNP128F14572B5.h5
/home/ubuntu/MillionSongSubset/data/B/F/S/TRBFSKB128F145C5B1.h5
/home/ubuntu/MillionSongSubset/data/B/F/S/TRBFSPA128F9340CDC.h5
/home/ubuntu/MillionSongSubset/data/B/F/S/TRBFSPZ12903CB5230.h5
/home/ubuntu/MillionSongSubset/data/B/F/S/TRBFSEX128F9300418.h5
/home/ubuntu/MillionSongSubset/data/B/F/B/TRBFBOG128F9356C6E.h5
/home/ubuntu/MillionSongSubset/data/B/F/B/TRBFBUO128F42A3DED.h5
/home/ubuntu/MillionSongSubset/data/B/F/B/TRBFBLS12903CBBABB.h5
/home/ubuntu/MillionSongSubset/data/B/F/

/home/ubuntu/MillionSongSubset/data/B/F/F/TRBFFKC128F92F67B0.h5
/home/ubuntu/MillionSongSubset/data/B/F/F/TRBFFQY128F1481E79.h5
/home/ubuntu/MillionSongSubset/data/B/F/F/TRBFFZN12903D08332.h5
/home/ubuntu/MillionSongSubset/data/B/F/F/TRBFFKN128F422F3CC.h5
/home/ubuntu/MillionSongSubset/data/B/F/F/TRBFFMO128F92CB1EF.h5
/home/ubuntu/MillionSongSubset/data/B/F/F/TRBFFNE128F92F6001.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVZZ128F92FD7B5.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVEP12903CF345D.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVZI128F147FCAF.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVOP12903CF0DFA.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVMO128F14537D3.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVAA128F423C356.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVTI12903CFD630.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVAS128F93576F0.h5
/home/ubuntu/MillionSongSubset/data/B/F/V/TRBFVKF128F92C8724.h5
/home/ubuntu/MillionSongSubset/data/B/F/

/home/ubuntu/MillionSongSubset/data/B/F/W/TRBFWZM128F4223E44.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAKP128F148B600.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAHF128F42A1F03.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAMR128F9306CC5.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFATS128F9319AEB.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAPA128F9353136.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAON128F9313C62.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFALE128EF341C77.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAGW12903CC81E2.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFASC128F424D427.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAWM128E0785A59.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFANQ12903CB8F48.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFANX128F42934C3.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAFH128F92D5762.h5
/home/ubuntu/MillionSongSubset/data/B/F/A/TRBFAWU12903CEC528.h5
/home/ubuntu/MillionSongSubset/data/B/F/

/home/ubuntu/MillionSongSubset/data/B/D/U/TRBDUWE128EF3583EA.h5
/home/ubuntu/MillionSongSubset/data/B/D/U/TRBDUYE12903CB246F.h5
/home/ubuntu/MillionSongSubset/data/B/D/U/TRBDUKK128F4241E90.h5
/home/ubuntu/MillionSongSubset/data/B/D/U/TRBDUMD128F9327534.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQLL128E078B757.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQVN128F425CF6D.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQYQ128F93214CB.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQAX12903CFD642.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQHA128F426EB28.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQUL128F14933F1.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQUF128F42AE54C.h5
/home/ubuntu/MillionSongSubset/data/B/D/Q/TRBDQIR12903CF0C9C.h5
/home/ubuntu/MillionSongSubset/data/B/D/F/TRBDFLD128F92D076F.h5
/home/ubuntu/MillionSongSubset/data/B/D/F/TRBDFKG128F4263AAF.h5
/home/ubuntu/MillionSongSubset/data/B/D/F/TRBDFCT128F4285E05.h5
/home/ubuntu/MillionSongSubset/data/B/D/

/home/ubuntu/MillionSongSubset/data/B/D/X/TRBDXOD128F422C47B.h5
/home/ubuntu/MillionSongSubset/data/B/D/X/TRBDXOG128F427031F.h5
/home/ubuntu/MillionSongSubset/data/B/D/X/TRBDXLZ12903CC8988.h5
/home/ubuntu/MillionSongSubset/data/B/D/X/TRBDXDO128F14A1CD1.h5
/home/ubuntu/MillionSongSubset/data/B/D/X/TRBDXIM128F427ABDA.h5
/home/ubuntu/MillionSongSubset/data/B/D/X/TRBDXKI128F9325211.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGRU128F934D684.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGNU128E078A4AF.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGQU128F146607A.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGOX128F428E2B9.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGMJ12903CCF69D.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGXH128F92E1661.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGLL128F9343CD1.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGMD128F92DA76F.h5
/home/ubuntu/MillionSongSubset/data/B/D/G/TRBDGSN128F4233EB8.h5
/home/ubuntu/MillionSongSubset/data/B/D/

/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLEZ128F427E5AF.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLBL128F42823F5.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLBZ128F9300586.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLBL12903D0137C.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLGE128F42A64E5.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLUB128F423E9BD.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLUF128F428E1F8.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLIH128F92FE649.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLRS128F92E9D69.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLIH128F1473B78.h5
/home/ubuntu/MillionSongSubset/data/B/H/L/TRBHLDQ128F423EF10.h5
/home/ubuntu/MillionSongSubset/data/B/H/U/TRBHUMZ128F427C822.h5
/home/ubuntu/MillionSongSubset/data/B/H/U/TRBHUOU128F423625E.h5
/home/ubuntu/MillionSongSubset/data/B/H/U/TRBHUCN128F932E8D7.h5
/home/ubuntu/MillionSongSubset/data/B/H/U/TRBHUBB128E0782209.h5
/home/ubuntu/MillionSongSubset/data/B/H/

/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHUB12903D09FEA.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHCU128F148AF19.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHNH128F92F6070.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHCS128F427F936.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHHC128F428428E.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHUC128F4294C6F.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHMI128F426779C.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHCI12903CAEEB5.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHRC128F427F353.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHFG128F93353FE.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHUF12903CAA46B.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHZF12903CBE8ED.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHDU128F92FD6B5.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHAC128F92D8379.h5
/home/ubuntu/MillionSongSubset/data/B/H/H/TRBHHGM12903CFD7C3.h5
/home/ubuntu/MillionSongSubset/data/B/H/

/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGELM128F424EDA2.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEUJ128F42B06B8.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEHK12903CEEFC0.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEGB128F9345BD0.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEIP12903CE8853.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEGG12903CDD71F.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEAZ12903CE8885.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEZA128F4286D23.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGERR12903CB8EE9.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGELF128E07915B8.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEZL12903CCC7E2.h5
/home/ubuntu/MillionSongSubset/data/B/G/E/TRBGEIB128F930403D.h5
/home/ubuntu/MillionSongSubset/data/B/G/N/TRBGNBC128F423DCD8.h5
/home/ubuntu/MillionSongSubset/data/B/G/N/TRBGNHU128F4220469.h5
/home/ubuntu/MillionSongSubset/data/B/G/N/TRBGNCL128F428F56F.h5
/home/ubuntu/MillionSongSubset/data/B/G/

/home/ubuntu/MillionSongSubset/data/B/G/Y/TRBGYFN128F42BC746.h5
/home/ubuntu/MillionSongSubset/data/B/G/Y/TRBGYHC12903D0626A.h5
/home/ubuntu/MillionSongSubset/data/B/G/Y/TRBGYCS128F4277187.h5
/home/ubuntu/MillionSongSubset/data/B/G/Y/TRBGYTF128F422F519.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDBT12903CDD795.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDSJ128F931BB2B.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDRZ128F92F1D4D.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDCS12903C9E51C.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDXO128EF3531E8.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDES128F4246FB0.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDOU128F93341A2.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDXT128F1487E86.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDCN12903CB8D6C.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDJY12903CEBF34.h5
/home/ubuntu/MillionSongSubset/data/B/G/D/TRBGDYF128F4233ED0.h5
/home/ubuntu/MillionSongSubset/data/B/G/

/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATRO128F4225566.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATKI128F4284D54.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATJY128F4248894.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATOU128F4236287.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATEY128F4285370.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATUH128F92E6BBC.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATEB128F4258804.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATNQ128F4227DD7.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATFM128F423640E.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATSC128F427D709.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATVW128E078530F.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATOI12903CEFF3A.h5
/home/ubuntu/MillionSongSubset/data/B/A/T/TRBATGR128F14AD2FA.h5
/home/ubuntu/MillionSongSubset/data/B/A/S/TRBASDY128F9327DB8.h5
/home/ubuntu/MillionSongSubset/data/B/A/S/TRBASHV128F92E5A7A.h5
/home/ubuntu/MillionSongSubset/data/B/A/

/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFNZ128F9308F67.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFFH12903CEF247.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFZB128F427268B.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFIP128F42438D8.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFWX12903CA9831.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFFS128EF357C1D.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFZB128F930969B.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFJQ128F4259FB5.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFQZ128F4282B20.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFFR128F425BC5B.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFBU128F427EFCE.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFRG12903CB0FF5.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFYR12903CA6588.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFBZ128F9325CA4.h5
/home/ubuntu/MillionSongSubset/data/B/A/F/TRBAFTU128F1458496.h5
/home/ubuntu/MillionSongSubset/data/B/A/

/home/ubuntu/MillionSongSubset/data/B/A/W/TRBAWKB128F931BAEE.h5
/home/ubuntu/MillionSongSubset/data/B/A/W/TRBAWQK128F42743A9.h5
/home/ubuntu/MillionSongSubset/data/B/A/W/TRBAWLE128F42919E2.h5
/home/ubuntu/MillionSongSubset/data/B/A/W/TRBAWHU128EF3563C5.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAASG128F92FBB7C.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAAEJ128F4263AB7.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAAOC128F42A6BEA.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAAJQ128F4273A42.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAAUV128F429553C.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAADN128F426B7A4.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAAOT128F4261A18.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAAXL12903CE78EE.h5
/home/ubuntu/MillionSongSubset/data/B/A/A/TRBAAGQ128F148D17E.h5
/home/ubuntu/MillionSongSubset/data/A/T/T/TRATTRI12903CAF946.h5
/home/ubuntu/MillionSongSubset/data/A/T/T/TRATTTA128F9306B7F.h5
/home/ubuntu/MillionSongSubset/data/A/T/

/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQSL128F932E2ED.h5
/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQDK128F1489B3E.h5
/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQWZ128F93133F8.h5
/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQHP128F421CDBA.h5
/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQUI128F9347C68.h5
/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQWS128F931E5EA.h5
/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQTC128F9358C8B.h5
/home/ubuntu/MillionSongSubset/data/A/T/Q/TRATQNV12903CD6111.h5
/home/ubuntu/MillionSongSubset/data/A/T/F/TRATFBG128F9352D17.h5
/home/ubuntu/MillionSongSubset/data/A/T/F/TRATFJI128F4252046.h5
/home/ubuntu/MillionSongSubset/data/A/T/F/TRATFEN128F14A3A6E.h5
/home/ubuntu/MillionSongSubset/data/A/T/F/TRATFYD12903CAE45A.h5
/home/ubuntu/MillionSongSubset/data/A/T/F/TRATFFT128F4248FA9.h5
/home/ubuntu/MillionSongSubset/data/A/T/F/TRATFDV128F42B62F7.h5
/home/ubuntu/MillionSongSubset/data/A/T/F/TRATFNU128F42466AF.h5
/home/ubuntu/MillionSongSubset/data/A/T/

/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGQQ128F9314185.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGRG128F145310B.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGAS128F425B7F0.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGQE128F93060E3.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGQP128F93306B0.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGUX12903CBA6AC.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGXT12903CFAA32.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGDZ128F9314B6C.h5
/home/ubuntu/MillionSongSubset/data/A/T/G/TRATGQO128F421B850.h5
/home/ubuntu/MillionSongSubset/data/A/T/W/TRATWUA128F425764B.h5
/home/ubuntu/MillionSongSubset/data/A/T/W/TRATWQQ128F426F1EC.h5
/home/ubuntu/MillionSongSubset/data/A/T/W/TRATWTI128F933B617.h5
/home/ubuntu/MillionSongSubset/data/A/T/W/TRATWHC128F42644DA.h5
/home/ubuntu/MillionSongSubset/data/A/T/W/TRATWUD128F9309671.h5
/home/ubuntu/MillionSongSubset/data/A/T/W/TRATWYC12903CE60D6.h5
/home/ubuntu/MillionSongSubset/data/A/T/

/home/ubuntu/MillionSongSubset/data/A/S/L/TRASLWL12903CA752D.h5
/home/ubuntu/MillionSongSubset/data/A/S/L/TRASLUV128F423DA7B.h5
/home/ubuntu/MillionSongSubset/data/A/S/L/TRASLTD128F92F9E1F.h5
/home/ubuntu/MillionSongSubset/data/A/S/L/TRASLMY128E0782D6A.h5
/home/ubuntu/MillionSongSubset/data/A/S/L/TRASLVK128F935046C.h5
/home/ubuntu/MillionSongSubset/data/A/S/L/TRASLVU128F933B160.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUKM128F422AA6F.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUTO12903CD5E07.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUIZ12903CA4ACE.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUTG128F935A445.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUBB12903CE6546.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUJK128E0789C12.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUDV128F42B86FB.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUUO12903CE3720.h5
/home/ubuntu/MillionSongSubset/data/A/S/U/TRASUPL128F9353B61.h5
/home/ubuntu/MillionSongSubset/data/A/S/

/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOQQ128F14A7968.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOCX128F4271AB9.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOPN128F9316701.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOHB128F92D88B6.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOVL128F427F7AB.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASONX128F429BE8C.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOTO128F9325223.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASODE128F930F19A.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOQY128F92FEC9F.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOHJ128F42512B1.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOQC12903CFFED7.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOFS128F4243346.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOUI128E078F152.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOHG128F4287A68.h5
/home/ubuntu/MillionSongSubset/data/A/S/O/TRASOEA128F934BF50.h5
/home/ubuntu/MillionSongSubset/data/A/S/

/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIOI12903CD8B9B.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABISL12903D08B0D.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIZN128F9317EDF.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIZY128F933C696.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIEL128F931E1F2.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIPA128F1458F79.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIOZ12903CF3554.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIJJ128F426A1C1.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABICV128F146AEF8.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIQP128F427D0DD.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIKX128F930CE4F.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIWU12903CDF3B7.h5
/home/ubuntu/MillionSongSubset/data/A/B/I/TRABIUA128F9349CDF.h5
/home/ubuntu/MillionSongSubset/data/A/B/C/TRABCUQ128E0783E2B.h5
/home/ubuntu/MillionSongSubset/data/A/B/C/TRABCEC128F426456E.h5
/home/ubuntu/MillionSongSubset/data/A/B/

/home/ubuntu/MillionSongSubset/data/A/B/K/TRABKRG128F930641C.h5
/home/ubuntu/MillionSongSubset/data/A/B/K/TRABKBS128F42823EC.h5
/home/ubuntu/MillionSongSubset/data/A/B/K/TRABKAS128F42AB822.h5
/home/ubuntu/MillionSongSubset/data/A/B/K/TRABKUJ128F4292445.h5
/home/ubuntu/MillionSongSubset/data/A/B/K/TRABKJU128F422A7FE.h5
/home/ubuntu/MillionSongSubset/data/A/B/K/TRABKMS128F92EAFAA.h5
/home/ubuntu/MillionSongSubset/data/A/B/K/TRABKZZ128F426A87F.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZPI12903CEB0EC.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZSI128F9329184.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZRB128F423EFA9.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZYK12903D000B2.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZKU12903CBFE15.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZOP128F9316E24.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZSL128F932A529.h5
/home/ubuntu/MillionSongSubset/data/A/B/Z/TRABZQU128F92ECDEF.h5
/home/ubuntu/MillionSongSubset/data/A/B/

/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEIK128F92F19EA.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJENI128F425B7F4.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEUV12903D15D6D.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJESP12903CBC0B0.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEYM12903CDEF6A.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEEW128F4280555.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJENI128F9300B02.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEDV128F93218B9.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEOD12903CC065D.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEZT128E0795DF3.h5
/home/ubuntu/MillionSongSubset/data/A/J/E/TRAJEYL128F931B518.h5
/home/ubuntu/MillionSongSubset/data/A/J/N/TRAJNYU128E07856A5.h5
/home/ubuntu/MillionSongSubset/data/A/J/N/TRAJNCR12903CEBAA1.h5
/home/ubuntu/MillionSongSubset/data/A/J/N/TRAJNBE128F4255C97.h5
/home/ubuntu/MillionSongSubset/data/A/J/N/TRAJNWY128F1453771.h5
/home/ubuntu/MillionSongSubset/data/A/J/

/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZRG128F930662C.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZJQ128F423F44F.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZKF12903CDA164.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZXZ128F4285158.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZCH128F933D2B7.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZWM128F4262BBD.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZYI128F424B412.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZIS128F4288E43.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZDD128F425AA57.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZDQ128F4260AC0.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZLM128F427CEB7.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZCR12903CC8D33.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZBD128F42A2502.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZII128F9358893.h5
/home/ubuntu/MillionSongSubset/data/A/J/Z/TRAJZZC128F424A4D0.h5
/home/ubuntu/MillionSongSubset/data/A/J/

/home/ubuntu/MillionSongSubset/data/A/E/J/TRAEJAC128F427FBDE.h5
/home/ubuntu/MillionSongSubset/data/A/E/J/TRAEJWK128F149EA60.h5
/home/ubuntu/MillionSongSubset/data/A/E/E/TRAEEMP128F92F4B9C.h5
/home/ubuntu/MillionSongSubset/data/A/E/E/TRAEEGO12903CF7D27.h5
/home/ubuntu/MillionSongSubset/data/A/E/E/TRAEEBP128EF3673A7.h5
/home/ubuntu/MillionSongSubset/data/A/E/E/TRAEELO128F425BD8F.h5
/home/ubuntu/MillionSongSubset/data/A/E/E/TRAEEGA128F4272FF8.h5
/home/ubuntu/MillionSongSubset/data/A/E/E/TRAEEOQ128F9309BC0.h5
/home/ubuntu/MillionSongSubset/data/A/E/E/TRAEELW128F933D255.h5
/home/ubuntu/MillionSongSubset/data/A/E/N/TRAENEO128F426789F.h5
/home/ubuntu/MillionSongSubset/data/A/E/N/TRAENGY128F4272846.h5
/home/ubuntu/MillionSongSubset/data/A/E/N/TRAENGX12903CBDFF6.h5
/home/ubuntu/MillionSongSubset/data/A/E/N/TRAENWI12903CECA29.h5
/home/ubuntu/MillionSongSubset/data/A/E/N/TRAENJZ12903CC901D.h5
/home/ubuntu/MillionSongSubset/data/A/E/N/TRAENRA128F934B998.h5
/home/ubuntu/MillionSongSubset/data/A/E/

/home/ubuntu/MillionSongSubset/data/A/E/Z/TRAEZAZ128F930608E.h5
/home/ubuntu/MillionSongSubset/data/A/E/Z/TRAEZMX128F425C3CE.h5
/home/ubuntu/MillionSongSubset/data/A/E/Z/TRAEZWE128F4244505.h5
/home/ubuntu/MillionSongSubset/data/A/E/Z/TRAEZKA128F4282B79.h5
/home/ubuntu/MillionSongSubset/data/A/E/Z/TRAEZLQ128F427F954.h5
/home/ubuntu/MillionSongSubset/data/A/E/Z/TRAEZFJ128F422A5C0.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHTU128F93353E5.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHYF128F148AE0C.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHBW12903CE1045.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHTT128F9303492.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHXW128F146F3C5.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHOI12903CD02C8.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHTK128F14A4706.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHLP128F933720D.h5
/home/ubuntu/MillionSongSubset/data/A/E/H/TRAEHMP128F92DCC42.h5
/home/ubuntu/MillionSongSubset/data/A/E/

/home/ubuntu/MillionSongSubset/data/A/N/J/TRANJMB128F42926E4.h5
/home/ubuntu/MillionSongSubset/data/A/N/J/TRANJSV128E0799689.h5
/home/ubuntu/MillionSongSubset/data/A/N/J/TRANJMO128F425C44D.h5
/home/ubuntu/MillionSongSubset/data/A/N/J/TRANJPA128F92CAE02.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANEMQ128F42A10A8.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANEQC12903D01CE6.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANEHV12903CD1E33.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANEQB128F4284978.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANELD128F4250FCF.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANELB128F92EF58A.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANELG128F424F264.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANEYV128F4269032.h5
/home/ubuntu/MillionSongSubset/data/A/N/E/TRANEMF128F42888C8.h5
/home/ubuntu/MillionSongSubset/data/A/N/N/TRANNJC128F429F7E3.h5
/home/ubuntu/MillionSongSubset/data/A/N/N/TRANNCH128F14B1A75.h5
/home/ubuntu/MillionSongSubset/data/A/N/

In [6]:
#msd_df = sqlContext.createDataFrame(zip(song_number, album_id, artist_latitude, artist_location, artist_longitude, artist_name, danceability, duration, energy, key_signature, key_signature_confidence, song_id, tempo, song_hotttnesss, time_signature, time_signature_confidence, title, year), schema=['song_number', 'album_id', 'artist_latitude', 'artist_location', 'artist_longitude', 'artist_name', 'danceability', 'duration', 'energy', 'key_signature', 'key_signature_confidence', 'song_id', 'tempo', 'song_hotttnesss', 'time_signature', 'time_signature_confidence', 'title', 'year'])

In [7]:
msd_df.show()

+-----------+--------+---------------+--------------------+----------------+--------------------+------------+---------+------+-------------+------------------------+--------------------+-------+-------------------+--------------+-------------------------+--------------------+----+
|song_number|album_id|artist_latitude|     artist_location|artist_longitude|         artist_name|danceability| duration|energy|key_signature|key_signature_confidence|             song_id|  tempo|    song_hotttnesss|time_signature|time_signature_confidence|               title|year|
+-----------+--------+---------------+--------------------+----------------+--------------------+------------+---------+------+-------------+------------------------+--------------------+-------+-------------------+--------------+-------------------------+--------------------+----+
|          1|  511272|               |                 b''|                |          b'Dickies'|         0.0|179.59138|   0.0|          2.0|          

In [8]:
msd_df.printSchema()

root
 |-- song_number: long (nullable = true)
 |-- album_id: string (nullable = true)
 |-- artist_latitude: string (nullable = true)
 |-- artist_location: string (nullable = true)
 |-- artist_longitude: string (nullable = true)
 |-- artist_name: string (nullable = true)
 |-- danceability: double (nullable = true)
 |-- duration: double (nullable = true)
 |-- energy: double (nullable = true)
 |-- key_signature: double (nullable = true)
 |-- key_signature_confidence: double (nullable = true)
 |-- song_id: string (nullable = true)
 |-- tempo: double (nullable = true)
 |-- song_hotttnesss: double (nullable = true)
 |-- time_signature: double (nullable = true)
 |-- time_signature_confidence: double (nullable = true)
 |-- title: string (nullable = true)
 |-- year: string (nullable = true)



In [9]:
start_time = time.time()

In [10]:
msd_df = msd_df.na.fill(0)
msd_df1 = msd_df.filter(msd_df['song_hotttnesss'] > 0).agg(avg(col("song_hotttnesss"))).collect()

In [11]:
average = rowUnpack(msd_df1)
average = average['avg(song_hotttnesss)']

msd_df_corrected = msd_df.withColumn("song_hotttnesss", \
              when(msd_df["song_hotttnesss"] == 0, average).otherwise(msd_df["song_hotttnesss"]))

In [12]:
msd_df_corrected.show()

+-----------+--------+---------------+--------------------+----------------+--------------------+------------+---------+------+-------------+------------------------+--------------------+-------+-------------------+--------------+-------------------------+--------------------+----+
|song_number|album_id|artist_latitude|     artist_location|artist_longitude|         artist_name|danceability| duration|energy|key_signature|key_signature_confidence|             song_id|  tempo|    song_hotttnesss|time_signature|time_signature_confidence|               title|year|
+-----------+--------+---------------+--------------------+----------------+--------------------+------------+---------+------+-------------+------------------------+--------------------+-------+-------------------+--------------+-------------------------+--------------------+----+
|          1|  511272|               |                 b''|                |          b'Dickies'|         0.0|179.59138|   0.0|          2.0|          

In [13]:
msd_df_sub = msd_df_corrected.select("duration","key_signature","tempo","time_signature","song_hotttnesss")
#msd_df_sub.show()

In [14]:
msd_sort = msd_df_sub.orderBy('duration', ascending=True)
#msd_sort.show()

In [15]:
(training_data, test_data) = msd_sort.randomSplit([0.7, 0.3])
training_data =training_data.rdd.map(lambda x: LabeledPoint(x[4], x[:4]))
test_data =test_data.rdd.map(lambda x: LabeledPoint(x[4], x[:4]))

#training_data.take(5)
#test_data.take(5)

In [16]:
model = DecisionTree.trainRegressor(training_data, categoricalFeaturesInfo={},
                                    impurity='variance', maxDepth=5, maxBins=32)

In [17]:
model_tree = model.toDebugString()

predictions = model.predict(test_data.map(lambda x: x.features))
labelsAndPredictions = test_data.map(lambda lp: lp.label).zip(predictions)

testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() / float(test_data.count())
end_time = time.time()
exec_time = end_time - start_time
print('Test Mean Squared Error = ' + str(testMSE))


In [18]:
print("--- %s seconds ---" % exec_time)


--- 29.352632999420166 seconds ---


In [19]:
input_cols = ["duration","key_signature","tempo","time_signature"]

for i, feat in enumerate(input_cols):
    model_tree = model_tree.replace('feature ' + str(i), feat)

print('Learned regression tree model: \n')
print(model_tree)

Learned regression tree model: 

DecisionTreeModel regressor of depth 5 with 53 nodes
  If (tempo <= 84.288)
   If (time_signature <= 0.5)
    If (key_signature <= 0.5)
     Predict: 0.45948195643562095
    Else (key_signature > 0.5)
     If (duration <= 233.65179)
      Predict: 0.6034409947366194
     Else (duration > 233.65179)
      Predict: 0.693947934490814
   Else (time_signature > 0.5)
    If (key_signature <= 0.5)
     If (time_signature <= 3.5)
      If (duration <= 275.526075)
       Predict: 0.40774666297059986
      Else (duration > 275.526075)
       Predict: 0.4720855202729366
     Else (time_signature > 3.5)
      If (duration <= 245.4722)
       Predict: 0.467170884496594
      Else (duration > 245.4722)
       Predict: 0.5124946748064468
    Else (key_signature > 0.5)
     If (tempo <= 66.869)
      If (duration <= 206.95465000000002)
       Predict: 0.4545419509003757
      Else (duration > 206.95465000000002)
       Predict: 0.43767098447536906
     Else (tempo > 66

In [20]:
spark_context.stop()

In [21]:
#model.save(spark_context, "/home/ubuntu/MillionSongLDSA")
#sameModel = DecisionTreeModel.load(sc, "target/tmp/myDecisionTreeRegressionModel")