In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import *
from sklearn import metrics
from pymatreader import read_mat
import numpy as np
import json
import os
import csv
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
cwd = os.getcwd()
jsonfiledir = os.path.join(cwd, 'EEGData')
jsonfiles = []


for file in os.listdir(jsonfiledir): #only json files in directory
    print('loading', file)
    filepth = os.path.join(jsonfiledir, file)
    f = open(filepth)
    data = json.load(f)
    data['sessionID'] = file
    jsonfiles.append(data)
    f.close()

loading 15576668519182930000.json
loading 17957179669740114000.json
loading 20199147052280082000.json
loading 20720091866675330000.json
loading 30329014825927670000.json
loading 31908791274438530000.json
loading 33881536642638900000.json
loading 37476885619405180000.json
loading 40263762851668330000.json
loading 51647007780956774000.json
loading 65578173022607020000.json
loading 70773977546393580000.json
loading 70949146837861695000.json
loading 71762937516704770000.json
loading 74708274640981460000.json
loading 80012270501270370000.json
loading 92471859140639980000.json
loading 9257898488.json
loading 9283782737282763.json


In [5]:
credentialsfile = os.path.join(cwd, 'spotifyclientcredentials.json')
with open(credentialsfile) as f:
    data = json.load(f)
    client_credentials_manager = SpotifyClientCredentials(client_id=data['client_id'],
                                                          client_secret=data['client_secret']) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [6]:
def getSpotifyFeatures(name):
    search = sp.search(q=name, type='track', limit=1)
    search_uri = search['tracks']['items'][0]['uri']
    search_features = sp.audio_features(search_uri)[0]
    search_songname = search['tracks']['items'][0]['name']
    search_artist = search['tracks']['items'][0]['artists'][0]['name']
    print(f'Found {search_songname} by {search_artist}')
    ret = [search_features['danceability'], search_features['energy'], search_features['key'], search_features['loudness'], \
           search_features['mode'], search_features['speechiness'], search_features['acousticness'], \
           search_features['instrumentalness'], search_features['liveness'], search_features['valence'], \
           search_features['tempo']]
    return ret

In [7]:
spotifyfeats = {'jazz': [], 'nature': [], 'classical': [], 'vocals': [], 'instrumental': [], 'electric': []}

csvfile = os.path.join(cwd, 'songnames.csv')
with open(csvfile, 'r', encoding='utf-8') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count > 0:
            vocalname = row[0]
            electricname = row[2]
            naturename = 'nature music'
            jazzname = row[6]
            classicalname = row[8]
            instrname = row[10]
            spotifyfeats['jazz'].append(getSpotifyFeatures(jazzname))
            spotifyfeats['nature'].append(getSpotifyFeatures(naturename))
            spotifyfeats['classical'].append(getSpotifyFeatures(classicalname))
            spotifyfeats['vocals'].append(getSpotifyFeatures(vocalname))
            spotifyfeats['instrumental'].append(getSpotifyFeatures(instrname))
            spotifyfeats['electric'].append(getSpotifyFeatures(electricname))
            
        line_count += 1

Found Take Five by The Dave Brubeck Quartet
Found Nature Music by Mother Nature Sound FX
Found Piano Concerto No. 24 in C Minor, K. 491: II. Larghetto by Wolfgang Amadeus Mozart
Found Don't Call Me Up by Mabel
Found Majesty by Apashe
Found Deltarune: Spamton Theme - FrostFM Version by Frostfm
Found Collard Greens And Black-Eyed Peas - Rudy Van Gelder/24 Bit Mastering/1998 Digital Remaster by Bud Powell
Found Nature Music by Mother Nature Sound FX
Found Marche militaire, Op. 51, No. 1 by Franz Schubert
Found Rather Be (feat. Jess Glynne) by Clean Bandit
Found cliché by mxmtoon
Found Sea Shanty Medley by Home Free
Found Naima by Louie Martin
Found Nature Music by Mother Nature Sound FX
Found Il barbiere di Siviglia (The Barber of Seville), Act I: Cavatina: Largo al factotum della citta by Gioachino Rossini
Found We Found Love by Rihanna
Found Chale by Eden Muñoz
Found Redial (From "Bomberman Hero") by Game & Sound
Found Yardbird Suite by Charlie Parker
Found Nature Music by Mother Nature

In [10]:
for d in jsonfiles:
    chandata = np.array((d['channel1'], \
                        d['channel2'], \
                        d['channel3'], \
                        d['channel4'], \
                        d['channel5'], \
                        d['channel6'], \
                        d['channel7'], \
                        d['channel8']))
    d['chandata'] = chandata
    d['chanavg'] = np.mean(chandata, axis=0)

In [12]:
NUM_POINTS = 120000
for d in jsonfiles:
    #interpolate to 120,000 points
    #d['chanavg']
    eeglen = len(d['chanavg'])
    x = [i * eeglen / NUM_POINTS for i in range(NUM_POINTS)] #x-coordinates to evaluate interp. values
    d['chanavginterp'] = np.interp(x, range(eeglen), d['chanavg'])

In [13]:
S = 10 #10 songs
for d in jsonfiles:
    d['datasplit'] = []
    for i in range(S):
        s, e = i * NUM_POINTS // S, (i+1) * NUM_POINTS // S
        d['datasplit'].append(d['chanavginterp'][s:e])

In [14]:
def getID(id):
    for entry in jsonfiles:
        if entry['sessionID'] == id:
            return entry
    return None

In [15]:
csvfile = os.path.join(cwd, 'UserSessionData.csv')
with open(csvfile, 'r', encoding='utf-8') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count > 0:
            title = row[0]
            genres = eval(row[2])
            songs = eval(row[3])
            ratings = eval(row[4])
            for i, rating in enumerate(ratings):
                ratings[i] = int(rating)
            #print(ratings)
            titlejson = title + '.json'
            print(titlejson)
            id = getID(titlejson)
            if id:
                id['ratings'] = ratings
                id['genres'] = genres
                id['songs'] = songs
        line_count += 1

33881536642638900000.json
80012270501270370000.json
74046263142686880000.json
15576668519182930000.json
17957179669740114000.json
74708274640981460000.json
30329014825927670000.json
37476885619405180000.json
70949146837861695000.json
19162543256201593000.json
92471859140639980000.json
40263762851668330000.json
71677240630889220000.json
31908791274438530000.json
51647007780956774000.json
40519162213428250000.json
65578173022607020000.json
15576668519182930000.json
20199147052280082000.json
44576528811972540000.json
26444285876842930000.json


In [16]:
jftrim = []
for x in jsonfiles:
    if 'ratings' in x.keys():
        jftrim.append(x)

In [17]:
S = 10
P = 14
NUM_POINTS = 120000
data = np.zeros(shape=(S*P, NUM_POINTS // S)) # S*P x NUM_POINTS = 10*19 x 120,000
ratings = np.zeros(S*P)

In [18]:
#EEG
ind = 0
for i in range(P):
    for j in range(S):
        data[ind] = jftrim[i]['datasplit'][j]
        ind += 1

In [19]:
ind = 0
for i in range(P):
    print(jftrim[i]['sessionID'])
    rt = jftrim[i]['ratings']
    print(rt)
    assert(len(rt) == 10)
    for j in range(S):
        ratings[ind] = rt[j]
        ind += 1
print(ratings)

15576668519182930000.json
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
17957179669740114000.json
[2, 3, 3, 2, 1, 3, 2, 3, 2, 2]
20199147052280082000.json
[2, 2, 1, 2, 2, 2, 2, 2, 2, 2]
30329014825927670000.json
[3, 1, 1, 2, 2, 2, 2, 3, 1, 4]
31908791274438530000.json
[2, 3, 4, 2, 4, 2, 1, 1, 3, 4]
33881536642638900000.json
[4, 2, 2, 2, 2, 2, 2, 3, 4, 4]
37476885619405180000.json
[4, 3, 4, 2, 3, 4, 1, 3, 4, 1]
40263762851668330000.json
[3, 4, 2, 3, 2, 1, 2, 2, 3, 1]
51647007780956774000.json
[3, 4, 2, 2, 2, 2, 2, 3, 4, 1]
65578173022607020000.json
[3, 4, 3, 4, 4, 2, 2, 2, 2, 2]
70949146837861695000.json
[3, 3, 1, 4, 1, 3, 3, 4, 4, 4]
74708274640981460000.json
[2, 2, 2, 2, 2, 2, 2, 2, 3, 2]
80012270501270370000.json
[2, 4, 4, 2, 2, 1, 1, 2, 4, 3]
92471859140639980000.json
[2, 2, 2, 2, 2, 2, 2, 2, 2, 4]
[2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 3. 2. 1. 3. 2. 3. 2. 2. 2. 2. 1. 2.
 2. 2. 2. 2. 2. 2. 3. 1. 1. 2. 2. 2. 2. 3. 1. 4. 2. 3. 4. 2. 4. 2. 1. 1.
 3. 4. 4. 2. 2. 2. 2. 2. 2. 3. 4. 4. 4. 3. 4. 2. 3. 4. 1

In [20]:
genreIds = {'jazz': 0, 'nature': 1, 'classical': 2, 'vocals': 3, 'instrumental': 4, 'electric': 5}

In [21]:
#genre+songs
data1 = np.zeros(shape=(S*P, NUM_POINTS // S + 2 + 11))
ind = 0
for i in range(P):
    print(jftrim[i]['sessionID'])
    gnr = jftrim[i]['genres']
    sng = jftrim[i]['songs']
    print(rt)
    assert(len(rt) == 10)
    for j in range(S):
        print([gnr[j],sng[j]])
        gId = genreIds[gnr[j]]
        data1[ind] = np.append(data[ind], [gId, sng[j]] + spotifyfeats[gnr[j]][sng[j]])
        ind += 1

15576668519182930000.json
[2, 2, 2, 2, 2, 2, 2, 2, 2, 4]
['classical', 6]
['electric', 13]
['nature', 9]
['jazz', 3]
['vocals', 6]
['instrumental', 2]
['instrumental', 2]
['nature', 9]
['electric', 9]
['vocals', 3]
17957179669740114000.json
[2, 2, 2, 2, 2, 2, 2, 2, 2, 4]
['instrumental', 1]
['classical', 13]
['jazz', 10]
['nature', 4]
['electric', 11]
['vocals', 5]
['instrumental', 12]
['nature', 5]
['jazz', 3]
['classical', 9]
20199147052280082000.json
[2, 2, 2, 2, 2, 2, 2, 2, 2, 4]
['classical', 1]
['nature', 5]
['vocals', 13]
['instrumental', 4]
['jazz', 1]
['electric', 11]
['classical', 2]
['instrumental', 3]
['electric', 6]
['jazz', 3]
30329014825927670000.json
[2, 2, 2, 2, 2, 2, 2, 2, 2, 4]
['instrumental', 11]
['electric', 7]
['classical', 9]
['jazz', 6]
['vocals', 10]
['nature', 14]
['instrumental', 7]
['nature', 10]
['jazz', 13]
['classical', 3]
31908791274438530000.json
[2, 2, 2, 2, 2, 2, 2, 2, 2, 4]
['jazz', 2]
['electric', 2]
['classical', 14]
['nature', 13]
['vocals', 11]


In [22]:
print(data1[0].shape)

(12013,)


In [23]:
x_train, x_test, y_train, y_test = \
    train_test_split(data1, ratings, test_size = 0.70, random_state = 5)

In [24]:
clf = RandomForestRegressor()
clf.fit(x_train, y_train)
y_pred_test = clf.predict(x_test)
diffs = []
for i in range(50):
    diffs.append( y_pred_test[i] - y_test[i] )
print(np.mean([x**2 for x in diffs]))

0.9719619999999999


In [25]:
from sklearn import neighbors
clf = neighbors.KNeighborsRegressor()
clf.fit(x_train, y_train)
y_pred_test = clf.predict(x_test)
diffs = []
for i in range(50):
    diffs.append( y_pred_test[i] - y_test[i] )
print(np.mean([x**2 for x in diffs]))

1.1079999999999999
