In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
sys.path.append("/content/drive/MyDrive/lab/2021/MyTabCNN/model")

In [3]:
import numpy as np
import keras
import tensorflow

class DataGenerator(tensorflow.keras.utils.Sequence):
    
    def __init__(self, list_IDs, data_path="/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/", batch_size=128, shuffle=True, label_dim = (6,21), spec_repr="c", con_win_size=9):
        
        self.list_IDs = list_IDs
        self.data_path = data_path
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.label_dim = label_dim
        self.spec_repr = spec_repr
        self.con_win_size = con_win_size
        self.halfwin = con_win_size // 2
        
        if self.spec_repr == "c":
            self.X_dim = (self.batch_size, 192, self.con_win_size, 1)
        elif self.spec_repr == "m":
            self.X_dim = (self.batch_size, 128, self.con_win_size, 1)
        elif self.spec_repr == "cm":
            self.X_dim = (self.batch_size, 320, self.con_win_size, 1)
        elif self.spec_repr == "s":
            self.X_dim = (self.batch_size, 1025, self.con_win_size, 1)
            
        self.y_dim = (self.batch_size, self.label_dim[0], self.label_dim[1])
        
        self.on_epoch_end()
        
    def __len__(self):
        # number of batches per epoch
        return int(np.floor(float(len(self.list_IDs)) / self.batch_size))
    
    def __getitem__(self, index):
        # generate indices of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        
        # find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        # generate data
        X, y = self.__data_generation(list_IDs_temp)
        
        return X, y
    
    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, list_IDs_temp):
        #Generates data containing batch_size samples
        # X : (n_samples, *dim, n_channels)
        
        # Initialization
        X = np.empty(self.X_dim)
        y = np.empty(self.y_dim)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            
            # determine filename
            data_dir = self.data_path + self.spec_repr + "/"
            filename = "_".join(ID.split("_")[:-1]) + ".npz"
            frame_idx = int(ID.split("_")[-1])
            
            # load a context window centered around the frame index
            loaded = np.load(data_dir + filename)
            full_x = np.pad(loaded["repr"], [(self.halfwin,self.halfwin), (0,0)], mode='constant')
            sample_x = full_x[frame_idx : frame_idx + self.con_win_size]
            X[i,] = np.expand_dims(np.swapaxes(sample_x, 0, 1), -1)

            # Store label
            y[i,] = loaded["labels"][frame_idx]

        return X, y
        
        
        

In [9]:
def tab2pitch(tab):
    pitch_vector = np.zeros(44)
    string_pitches = [40, 45, 50, 55, 59, 64]
    for string_num in range(len(tab)):
        fret_vector = tab[string_num]
        fret_class = np.argmax(fret_vector, -1)
        # 0 means that the string is closed 
        if fret_class > 0:
            pitch_num = fret_class + string_pitches[string_num] - 41
            pitch_vector[pitch_num] = 1
    return pitch_vector

def tab2bin(tab):
    tab_arr = np.zeros((6,20))
    for string_num in range(len(tab)):
        fret_vector = tab[string_num]
        fret_class = np.argmax(fret_vector, -1)
        # 0 means that the string is closed 
        if fret_class > 0:
            fret_num = fret_class - 1
            tab_arr[string_num][fret_num] = 1
    return tab_arr

def pitch_precision(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_pred.flatten())
    return (1.0 * numerator) / denominator

def pitch_recall(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_gt.flatten())
    return (1.0 * numerator) / denominator

def pitch_f_measure(pred, gt):
    p = pitch_precision(pred, gt)
    r = pitch_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

def tab_precision(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_pred.flatten())
    return (1.0 * numerator) / denominator

def tab_recall(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_gt.flatten())
    return (1.0 * numerator) / denominator

def tab_f_measure(pred, gt):
    p = tab_precision(pred, gt)
    r = tab_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

def tab_disamb(pred, gt):
    tp = tab_precision(pred, gt)
    pp = pitch_precision(pred, gt)
    return tp / pp

In [None]:
anno_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/c/00_Jazz3-150-C_comp.npz"
load = np.load(anno_path)
label = load["labels"]
repr = load["repr"]

repr.shape

(1103, 192)

In [22]:
from __future__ import print_function
import keras
import os
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Reshape, Activation
from keras.layers import Conv2D, MaxPooling2D, Conv1D, Lambda
from keras import backend as K
import tensorflow
import pandas as pd
import numpy as np
import datetime

class TabCNN:
    
    def __init__(self, 
                 batch_size=128, 
                 epochs=1,#8
                 con_win_size = 9,
                 spec_repr="c",
                 data_path="/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/",
                 id_file="id.csv",
                 save_path="saved/"):
        
        self.batch_size = batch_size
        self.epochs = epochs
        self.con_win_size = con_win_size
        self.spec_repr = spec_repr
        self.data_path = data_path
        self.id_file = id_file
        self.save_path = save_path
        
        self.load_IDs()
        
        # self.save_folder = self.save_path + self.spec_repr + " " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "/"
        # if not os.path.exists(self.save_folder):
        #     os.makedirs(self.save_folder)
        # self.log_file = self.save_folder + "log.txt"
        
        self.metrics = {}
        self.metrics["pp"] = []
        self.metrics["pr"] = []
        self.metrics["pf"] = []
        self.metrics["tp"] = []
        self.metrics["tr"] = []
        self.metrics["tf"] = []
        self.metrics["tdr"] = []
        self.metrics["data"] = ["g0","g1","g2","g3","g4","g5","mean","std dev"]
        
        if self.spec_repr == "c":
            self.input_shape = (192, self.con_win_size, 1)
        elif self.spec_repr == "m":
            self.input_shape = (128, self.con_win_size, 1)
        elif self.spec_repr == "cm":
            self.input_shape = (320, self.con_win_size, 1)
        elif self.spec_repr == "s":
            self.input_shape = (1025, self.con_win_size, 1)
            
        # these probably won't ever change
        self.num_classes = 21
        self.num_strings = 6

    def load_IDs(self):
        csv_file = self.data_path + self.id_file
        self.list_IDs = list(pd.read_csv(csv_file, header=None)[0])
        
    def partition_data(self, data_split):
        self.data_split = data_split
        self.partition = {}
        self.partition["training"] = []
        self.partition["validation"] = []
        for ID in self.list_IDs:
            guitarist = int(ID.split("_")[0])
            if guitarist == data_split:
                self.partition["validation"].append(ID)
            else:
                self.partition["training"].append(ID)
                
        self.training_generator = DataGenerator(self.partition['training'], 
                                                data_path=self.data_path, 
                                                batch_size=self.batch_size, 
                                                shuffle=True,
                                                spec_repr=self.spec_repr, 
                                                con_win_size=self.con_win_size)
        
        self.validation_generator = DataGenerator(self.partition['validation'], 
                                                data_path=self.data_path, 
                                                batch_size=self.batch_size, 
                                                shuffle=False,
                                                spec_repr=self.spec_repr, 
                                                con_win_size=self.con_win_size)
        return self.training_generator, self.validation_generator
                
    def log_model(self):
        with open(self.log_file,'w') as fh:
            fh.write("\nbatch_size: " + str(self.batch_size))
            fh.write("\nepochs: " + str(self.epochs))
            fh.write("\nspec_repr: " + str(self.spec_repr))
            fh.write("\ndata_path: " + str(self.data_path))
            fh.write("\ncon_win_size: " + str(self.con_win_size))
            fh.write("\nid_file: " + str(self.id_file) + "\n")
            self.model.summary(print_fn=lambda x: fh.write(x + '\n'))
       
    def softmax_by_string(self, t):
        sh = K.shape(t)
        string_sm = []
        for i in range(self.num_strings):
            string_sm.append(K.expand_dims(K.softmax(t[:,i,:]), axis=1))
        return K.concatenate(string_sm, axis=1)
    
    def catcross_by_string(self, target, output):
        loss = 0
        for i in range(self.num_strings):
            loss += K.categorical_crossentropy(target[:,i,:], output[:,i,:])
        return loss
    
    def avg_acc(self, y_true, y_pred):
        return K.mean(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)))
           
    def build_model(self):
        model = Sequential()
        model.add(Conv2D(32, kernel_size=(3, 3),
                             activation='relu',
                             input_shape=self.input_shape))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))   
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(self.num_classes * self.num_strings)) # no activation
        model.add(Reshape((self.num_strings, self.num_classes)))
        model.add(Activation(self.softmax_by_string))

        model.compile(loss=self.catcross_by_string,
                      optimizer=tensorflow.keras.optimizers.SGD(),
                      metrics=[self.avg_acc])
        
        self.model = model

    def train(self):
        self.model.fit_generator(generator=self.training_generator,
                    validation_data=None,
                    epochs=self.epochs,
                    verbose=1,
                    use_multiprocessing=True,
                    workers=9)
        
    def save_weights(self):
        self.model.save_weights(self.split_folder + "weights.h5")

    def load_weights(self):
        self.model.load_weights("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved/2022-06-04/weights.h5")

    def data(self):
        self.input_data = self.validation_generator[0]
        self.X_test, self.y_gt = self.input_data
        return self.input_data

    def test(self):
        self.y_pred = self.model.predict(self.X_test)
        return self.y_gt, self.y_pred

    def show(self):
        anno_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/c/00_Jazz3-150-C_comp.npz"
        load = np.load(anno_path)
        label = load["labels"]
        repr = load["repr"]
        labels = label[:len(label)//128*128]
        repr = np.pad(repr, [(4,4), (0,0)], mode = "constant")
        input_data = []
        for i in range(len(repr)-8):
          input_data.append(repr[i:i+9])
        repr = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,2,0,3))
        input = []
        for i in range(len(repr)//128):
          input.append(repr[i*128:(i+1)*128])
        input_data = np.array(input)
        reprs = np.zeros(((len(repr)//128*128, 6, 21)))
        results = {}
        results["pp"] = []
        results["pr"] = []
        results["pf"] = []
        results["tp"] = []
        results["tr"] = []
        results["tf"] = []
        for i in range(len(repr)//128):
          images = input_data[i]
          outputs = self.model.predict(images)
          pre = outputs
          label = labels[i*128:(i+1)*128]
          gt = label
          results["pp"].append(pitch_precision(pre,gt))
          results["pr"].append(pitch_recall(pre,gt))
          results["pf"].append(pitch_f_measure(pre,gt))
          results["tp"].append(tab_precision(pre,gt))
          results["tr"].append(tab_recall(pre,gt))
          results["tf"].append(tab_f_measure(pre,gt))
          predicted = np.argmax(outputs, 2)
          for j, strings in enumerate(predicted):
            for k, fret in enumerate(strings):
              reprs[i*128+j][k][fret] = 1
        reprs = np.array(reprs)
        return labels, reprs, results

    def predictions(self):
      anno_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/c/00_Rock1-90-C#_comp.npz"
      load = np.load(anno_path)
      data = load["repr"]
      data = np.pad(data, [(4,4), (0,0)], mode = "constant")
      input_data = []
      for i in range(len(data)-8):
        input_data.append(data[i:i+9])
      input = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,2,0,3))
      out = self.model.predict(input)
      return out

    def predictions_song(self):
      anno_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/c/00_Jazz3-150-C_comp.npz"
      load = np.load(anno_path)
      
    
    def save_predictions(self):
        np.savez(self.split_folder + "predictions.npz", y_pred=self.y_pred, y_gt=self.y_gt)
        
    def evaluate(self):
        self.metrics["pp"].append(pitch_precision(self.y_pred, self.y_gt))
        self.metrics["pr"].append(pitch_recall(self.y_pred, self.y_gt))
        self.metrics["pf"].append(pitch_f_measure(self.y_pred, self.y_gt))
        self.metrics["tp"].append(tab_precision(self.y_pred, self.y_gt))
        self.metrics["tr"].append(tab_recall(self.y_pred, self.y_gt))
        self.metrics["tf"].append(tab_f_measure(self.y_pred, self.y_gt))
        self.metrics["tdr"].append(tab_disamb(self.y_pred, self.y_gt))
        return self.metrics
        
    def save_results_csv(self):
        output = {}
        for key in self.metrics.keys():
            if key != "data":
                vals = self.metrics[key]
                mean = np.mean(vals)
                std = np.std(vals)
                output[key] = vals + [mean, std]
        output["data"] =  self.metrics["data"]
        np.save('results.npy', output)
        # df = pd.DataFrame.from_dict(output)
        # df.to_csv(self.save_folder + "results.csv") 
        
##################################
########### EXPERIMENT ###########
##################################
'''
tabcnn = TabCNN()

spec_repr = "c"

print("logging model...")
tabcnn.build_model()
tabcnn.log_model()

for fold in range(1):#6
    print("\nfold " + str(fold))
    tabcnn.partition_data(fold)
    print("building model...")
    tabcnn.build_model()
    # print("training...")
    # tabcnn.train()
    # tabcnn.save_weights()
    tabcnn.load_weights()
    print("testing...")
    tabcnn.test()
    tabcnn.save_predictions()
    print("evaluation...")
    tabcnn.evaluate()
print("saving results...")
tabcnn.save_results_csv()
'''

'\ntabcnn = TabCNN()\n\nspec_repr = "c"\n\nprint("logging model...")\ntabcnn.build_model()\ntabcnn.log_model()\n\nfor fold in range(1):#6\n    print("\nfold " + str(fold))\n    tabcnn.partition_data(fold)\n    print("building model...")\n    tabcnn.build_model()\n    # print("training...")\n    # tabcnn.train()\n    # tabcnn.save_weights()\n    tabcnn.load_weights()\n    print("testing...")\n    tabcnn.test()\n    tabcnn.save_predictions()\n    print("evaluation...")\n    tabcnn.evaluate()\nprint("saving results...")\ntabcnn.save_results_csv()\n'

In [23]:
tabcnn = TabCNN()

spec_repr = "c"

print("logging model...")
tabcnn.build_model()
#tabcnn.log_model()
tabcnn.partition_data(0)
print("building model...")
tabcnn.build_model()
# print("training...")
# tabcnn.train()
# tabcnn.save_weights()
tabcnn.load_weights()
#tabcnn.test()
# tabcnn.save_predictions()
# print("evaluation...")
# tabcnn.evaluate()
# print("saving results...")
# tabcnn.save_results_csv()

logging model...
building model...


In [24]:
input_data = tabcnn.data()
X_test, y_gt = input_data
X_test.shape

(128, 192, 9, 1)

In [25]:
y_gt, y_pre = tabcnn.test()
print(y_pre.shape)
tab_recall(y_pre, y_gt)
met = tabcnn.evaluate()
print(met)

(128, 6, 21)
{'pp': [0.8385964912280702], 'pr': [0.7220543806646526], 'pf': [0.775974025974026], 'tp': [0.8385964912280702], 'tr': [0.7220543806646526], 'tf': [0.775974025974026], 'tdr': [1.0], 'data': ['g0', 'g1', 'g2', 'g3', 'g4', 'g5', 'mean', 'std dev']}


In [None]:
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from matplotlib import animation, rc
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
import IPython.display

In [None]:

\
fig = plt.figure()

ims = []

labels, reprs, results = tabcnn.show()
for i in results:
  print(str(i) + ":" + str(np.average(results[i])))

for rp, lb in zip(reprs, labels):
  im = Image.new('RGB', (500, 350), (255, 255, 255))
  draw = ImageDraw.Draw(im)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for string, (j_pr, j_lb) in enumerate(zip(rp, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            if k_pr == k_lb:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            else:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  im = plt.imshow(np.array(im))
  
  ims.append([im])

ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
print(tab2bin2(tabcnn.predictions()[0][10]))
print(tabcnn.predictions()[1][10])

In [None]:
a, b = tabcnn.data()
a.shape

(78760, 192, 9, 1)

In [None]:
!pip install pydub
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from matplotlib import animation, rc
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
import IPython.display

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
fig = plt.figure()

ims = []
out = tabcnn.predictions()
print(out.shape)
tabs = []
for k in out:
  tab = tab2bin2(k)
  tabs.append(tab)
for i in tabs:
    im = Image.new('RGB', (500, 350), (255, 255, 255))
    draw = ImageDraw.Draw(im)

    for j in range(6):
      draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
    for j in range(20):
      draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

    for string, j in enumerate(i):
      for fret, k in enumerate(j):
        if not fret == 0:
            if k == 1:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
    im = plt.imshow(np.array(im))
      
    ims.append([im])
  
ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
anno_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/c/00_Rock1-90-C#_comp.npz"
load = np.load(anno_path)
fig = plt.figure()

ims = []
data = load["labels"]
print(data.shape)
for i in data:
  im = Image.new('RGB', (500, 350), (255, 255, 255))
  draw = ImageDraw.Draw(im)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for string, j in enumerate(i):
    for fret, k in enumerate(j):
      if not fret == 0:
          if k == 1:
            draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
  im = plt.imshow(np.array(im))
  
  ims.append([im])

ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
audio_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/GuitarSet/audio/audio_mic/00_Rock1-90-C#_comp_mic.wav"
IPython.display.Audio(audio_path)

Output hidden; open in https://colab.research.google.com to view.