In [None]:
!pip3 install jams

In [7]:
import numpy as np
import os
import pprint

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

import scipy.io.wavfile as wav
import jams


## Process input

In [22]:
def timeKey(t):
    return t[0]

def loadNoteData(data):
    f = open('/content/JamsFiles/04_Rock1-90-C_solo.jams', 'r')

   # data = f.read()
    data = jams.load(f)
    notes = []

    for i in range (0, 6):
        for j in data.annotations["note_midi"][i]["data"]:
            notes.append([j[0],j[1],j[2]])
            
    notes.sort(key=timeKey)

    #pprint.pprint(notes)
    return notes

In [32]:
#iterate through each song, one frame at a time based off hop and window size
def songProcess(audio,notes,sr,hopSize = 1024,winSize = 4096,features = [],labels = []):

  offsets = np.arange(0,len(audio),hopSize)
  for (i,o) in enumerate(offsets):
    frame = audio[o:o+winSize]
    note = 0
    tiebreak = []

    #Create labels from jams file data -- if mulitple notes in frame, take longest duration
    for i,(time,duration,value) in enumerate(notes):      
      note_start = time*sr
      note_end = (time+duration)*sr
      if o <= note_start < o+winSize:                 #note starts in frame
        tiebreak.append(i)
      elif o <= note_end < o+winSize:                  #note ends in frame
        tiebreak.append(i)
      elif note_start < o and o+winSize <= note_end: #note continous thru frame
        note = value

    #if mulitple notes in frame, choose one that played the longest in frame
    if len(tiebreak) > 0:
      if len(tiebreak) == 1:
        note = notes[tiebreak[0]][2]
      else:
        max_dur = 0
        max_note = 0
        for index in tiebreak:
          note_start = notes[index][0] * sr
          note_duration = notes[index][1] * sr
          frame_dur = 0
          if note_start < o:
            frame_dur = note_duration+note_start-o
          elif note_start+note_duration > o+winSize:
            frame_dur = o+winSize - note_start
          else:
            frame_dur = note_duration

          if frame_dur > max_dur:
            max_dur = frame_dur
            max_note = notes[index][2]
        note = max_note

    features.append(frame)    #Raw frame used for features. Maybe process somehow??
    labels.append(int(note))  #quantize to the nearest midi value

  return features,labels

## Data classification using Neural Network

In [None]:
def neuralNetwork(XTrain, XTest, YTrain, YTest):
  #Currently using successful values from A1, further adjustment with the processed dataset will be needed
  MLPC = MLPClassifier(hidden_layer_sizes=(71, 21), max_iter=10000, alpha=0.0001, learning_rate_init=0.001, solver='adam')
  MLPC.fit(XTrain, YTrain)

  return accuracy_score(YTest, MLPC.predict(XTest))

## Data classification using SVM

In [None]:
def gaussianSVM(XTrain, XTest, YTrain, YTest):
  #Currently using successful values from A2, further adjustment with the processed dataset will be needed
  svc = SVC(kernel = 'rbf', gamma = 0.1, C = 10)
  svc.fit(XTrain, YTrain)

  return accuracy_score(YTest, svc.predict(XTest))

In [None]:
def linearSVM(XTrain, XTest, YTrain, YTest):
  #Currently using successful values from A2, further adjustment with the processed dataset will be needed
  svc = LinearSVC(C = 0.0225)

  svc.fit(XTrain, YTrain)
  accuracy_score(YTest, svc.predict(XTest))

## Driver

In [None]:
#TODO: load the song and jam file from respective folders.. Maybes use zip() to create list of 2 pair tuples?
song_path = r'/content/Datasets/audio_mono-mic'
jam_path = r'/content/JamsFiles/04_rock1-90-solo.jams'  #Hard coded for now..
songs = os.listdir(song_path)
for song in songs:
  songPath = os.path.join(song_path,song)
  sampleRate,audio = wav.read(songPath)
  #window size used in previous works with dataset where 0.2s
  note_info = loadNoteData(jam_path)
  XData,YData = songProcess(audio,note_info,sampleRate) #default window and hop. Can pass in a feature matrix and label array if we want to concate mulitple songs together

#Split data
XTrain, XTest, YTrain, YTest = train_test_split(XData, YData, test_size = 0.2)

#Call ML functions..
gaussianSVM(XTrain,YTrain)
linearSVM(XTrain,YTrain)
neuralNetwork(XTrain,YTrain)

## Testing Ground

In [13]:
f = open('/content/JamsFiles/04_Rock1-90-C_solo.jams', 'r')

# data = f.read()
data = jams.load(f)

In [30]:
print("\ttime\tduration\tnotes")
pprint.pprint(loadNoteData(data))

	time	duration	notes
[[1.0279111111111092, 0.13351473922902812, 49.193803230707154],
 [1.304237641723354, 0.08707482993197146, 52.114584297971554],
 [1.3941242630385489, 0.23800453514738962, 53.06152785960328],
 [1.679385034013606, 0.13931972789115576, 56.14443073972567],
 [1.9566866213151926, 0.2902494331065739, 56.17702536590502],
 [2.3031265306122393, 0.09287981859410621, 53.08339188734166],
 [2.6073442176870714, 0.12190476190475863, 57.14782805032149],
 [2.9405641723355984, 0.12190476190475863, 60.91999064053075],
 [3.2740108843537357, 0.09868480725623385, 60.33037973363317],
 [3.5828317460317436, 0.5050340136054388, 61.522481510672925],
 [4.3526956916099735, 0.11029478458049624, 57.98474034208964],
 [4.642673015873008, 0.09287981859410621, 59.10588569730126],
 [4.9913351473922845, 0.7372335600907007, 61.11095551176679],
 [7.287298866213149, 0.2844444444444463, 63.46856030989381],
 [7.575303401360543, 0.7488435374149631, 61.16104776571266],
 [8.350292063492056, 0.4063492063492049, 