In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!nvidia-smi

Mon Jul  4 18:52:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-40GB      Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0    45W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import numpy as np
from torch.utils.data import Dataset
import os

class MyDataGenerator(Dataset):
    
    def __init__(self, list_IDs, data_path="/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr", batch_size=128, shuffle=True, label_dim = (6,21), spec_repr="c", con_win_size=9):
        
        self.list_IDs = list_IDs
        self.data_path = data_path
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.label_dim = label_dim
        self.spec_repr = spec_repr
        self.con_win_size = con_win_size
        self.halfwin = con_win_size // 2
        
        if self.spec_repr == "c":
            self.X_dim = (self.batch_size, 192, self.con_win_size)
        elif self.spec_repr == "m":
            self.X_dim = (self.batch_size, 128, self.con_win_size)
        elif self.spec_repr == "cm":
            self.X_dim = (self.batch_size, 320, self.con_win_size)
        elif self.spec_repr == "s":
            self.X_dim = (self.batch_size, 1025, self.con_win_size)
            
        self.y_dim = (self.batch_size, self.label_dim[0], self.label_dim[1])
        
        self.on_epoch_end()
        
    def __len__(self):
        # number of batches per epoch
        return int(np.floor(float(len(self.list_IDs)) / self.batch_size))
    
    def __getitem__(self, index):
        # generate indices of the batch(バッチごとのインデクス)
        indexes = self.indexes[index*self.batch_size : (index+1)*self.batch_size]
        
        # find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        # generate data
        X, y, tempo = self.__data_generation(list_IDs_temp)
        
        return X, y, tempo
    
    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, list_IDs_temp):
        #Generates data containing batch_size samples
        # X : (n_samples, *dim, n_channels)
        
        # Initialization
        X = np.empty(self.X_dim)
        y = np.empty(self.y_dim)
        tempo = []

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            
            # determine filename
            data_dir = self.data_path + self.spec_repr + "/"
            filename = "_".join(ID.split("_")[:-1]) + ".npz"
            tempo.append(int(ID.split("-")[1]))
            frame_idx = int(ID.split("_")[-1])
            
            # load a context window centered around the frame index
            loaded = np.load(data_dir + filename)
            full_x = np.pad(loaded["repr"], [(self.halfwin,self.halfwin), (0,0)], mode='constant')
            sample_x = full_x[frame_idx : frame_idx + self.con_win_size]
            #X[i,] = np.expand_dims(np.swapaxes(sample_x, 0, 1), -1)
            X[i,] = np.swapaxes(sample_x, 0, 1)

            # Store label
            y[i,] = loaded["labels"][frame_idx]

        return X, y, tempo

In [None]:
class MyDataGenerator2(Dataset):
    
    def __init__(self, list_IDs, data_path="/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr", batch_size=128, label_dim = (6,21), spec_repr="c", window_size = 50):
        
        self.list_IDs = list_IDs
        self.data_path = data_path
        self.batch_size = batch_size
        self.label_dim = label_dim
        self.spec_repr = spec_repr
        self.window_size = window_size
        
        if self.spec_repr == "c":
            self.X_dim = (self.batch_size, 192, 2000, 1)
        elif self.spec_repr == "m":
            self.X_dim = (self.batch_size, 128, 2000, 1)
        elif self.spec_repr == "cm":
            self.X_dim = (self.batch_size, 320, 2000, 1)
        elif self.spec_repr == "s":
            self.X_dim = (self.batch_size, 1025, 2000, 1)
            
        self.y_dim = (self.batch_size, 2000, self.label_dim[0], self.label_dim[1])
        
        self.on_epoch_end()
        
    def __len__(self):
        # number of batches per epoch
        return int(np.floor(float(len(self.list_IDs)) / self.batch_size))
    
    def __getitem__(self, index):
        # generate indices of the batch(バッチごとのインデクス)
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        
        # find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        # generate data
        X, y, ID = self.__data_generation(list_IDs_temp)
        
        return X, y, ID
    
    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.list_IDs))
            
    def __data_generation(self, list_IDs_temp):
        #Generates data containing batch_size samples
        # X : (n_samples, *dim, n_channels)
        
        # Initialization
        X = np.empty(self.X_dim)
        y = np.empty(self.y_dim)
        X = []
        files = []

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            
            # determine filename
            data_dir = self.data_path + self.spec_repr + "/"
            filename = ID + ".npz"
            files.append(filename)
            
            # load a context window centered around the frame index
            loaded = np.load(data_dir + filename)

            slice_num = len(loaded["repr"])//self.window_size + 1
            all_length = slice_num * self.window_size
            pad_span = all_length - len(loaded["repr"]) - 4
            full_x = np.pad(loaded["repr"], [(4,pad_span), (0,0)], mode="constant")
            sample_x = full_x
            for j in range(slice_num):
              X.append(np.expand_dims(np.swapaxes(sample_x, 0, 1), -1))

            pad_span = 1996 - len(loaded["repr"])
            full_x = np.pad(loaded["repr"], [(4,pad_span), (0,0)], mode='constant')
            sample_x = full_x
            X[i,] = np.expand_dims(np.swapaxes(sample_x, 0, 1), -1)

            # Store label
            y[i,][:len(loaded["repr"])] = loaded["labels"]
            for j in range(len(loaded["labels"]),2000):
                for k in range(6):
                    y[i,][j][k][0] = 1

        return X, y, files

In [None]:
class MyDataGenerator3(Dataset):
    
    def __init__(self, list_IDs, data_path="/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr", batch_size=128, shuffle=True, label_dim = (6,21), spec_repr="c", con_win_size=9):
        
        self.list_IDs = list_IDs
        self.data_path = data_path
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.label_dim = label_dim
        self.spec_repr = spec_repr
        self.con_win_size = con_win_size
        self.halfwin = con_win_size // 2
        
        if self.spec_repr == "c":
            self.X_dim = (self.batch_size, 192, self.con_win_size)
        elif self.spec_repr == "m":
            self.X_dim = (self.batch_size, 128, self.con_win_size)
        elif self.spec_repr == "cm":
            self.X_dim = (self.batch_size, 320, self.con_win_size)
        elif self.spec_repr == "s":
            self.X_dim = (self.batch_size, 1025, self.con_win_size)
            
        self.y_dim = (self.batch_size, self.label_dim[0], self.label_dim[1])
        
        self.on_epoch_end()
        
    def __len__(self):
        # number of batches per epoch
        return int(np.floor(float(len(self.list_IDs)) / self.batch_size))
    
    def __getitem__(self, index):
        # generate indices of the batch(バッチごとのインデクス)
        indexes = self.indexes[index*self.batch_size : (index+1)*self.batch_size]
        
        # find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        # generate data
        X, y, tempo = self.__data_generation(list_IDs_temp)
        
        return X, y, tempo
    
    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, list_IDs_temp):
        #Generates data containing batch_size samples
        # X : (n_samples, *dim, n_channels)
        
        # Initialization
        X = np.empty(self.X_dim)
        y = np.empty(self.y_dim)
        tempo = []

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            
            # determine filename
            data_dir = self.data_path + self.spec_repr + "/"
            filename = "_".join(ID.split("_")[:-1]) + ".npz"
            tempo.append(int(ID.split("-")[1]))
            frame_idx = int(ID.split("_")[-1])
            
            # load a context window centered around the frame index
            loaded = np.load(data_dir + filename)
            full_x = np.pad(loaded["repr"], [(self.halfwin,self.halfwin), (0,0)], mode='constant')
            sample_x = full_x[frame_idx : frame_idx + self.con_win_size]
            X[i,] = np.swapaxes(sample_x, 0, 1)

            # Store label
            y[i,] = loaded["labels"][frame_idx]

        return X, y, tempo

In [None]:
class MyDataGenerator4(Dataset):
    
    def __init__(self, list_IDs, data_path="/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr", batch_size=128, shuffle=True, label_dim = (6,21), spec_repr="c", con_win_size=9):
        
        self.list_IDs = list_IDs
        self.data_path = data_path
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.label_dim = label_dim
        self.spec_repr = spec_repr
        self.con_win_size = con_win_size
        self.halfwin = con_win_size // 2
        
        if self.spec_repr == "c":
            self.X_dim = (self.batch_size, 192, self.con_win_size, 1)
        elif self.spec_repr == "m":
            self.X_dim = (self.batch_size, 128, self.con_win_size, 1)
        elif self.spec_repr == "cm":
            self.X_dim = (self.batch_size, 320, self.con_win_size, 1)
        elif self.spec_repr == "s":
            self.X_dim = (self.batch_size, 1025, self.con_win_size, 1)
            
        self.y_dim = (self.batch_size, self.label_dim[0], self.label_dim[1])
        
        self.on_epoch_end()
    
    def __len__(self):
        # number of batches per epoch
        return len(self.list_IDs)*2048
    
    def __getitem__(self, index):
        # generate indices of the batch(バッチごとのインデクス)
        indexes = self.indexes[index*self.batch_size : (index+1)*self.batch_size]
        
        # find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        # generate data
        X, y, tempo = self.__data_generation(list_IDs_temp)
        
        return X, y, tempo
    
    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, list_IDs_temp):
        #Generates data containing batch_size samples
        # X : (n_samples, *dim, n_channels)
        
        # Initialization
        X = np.empty(self.X_dim)
        y = np.empty(self.y_dim)
        tempo = []

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            
            # determine filename
            data_dir = self.data_path + self.spec_repr + "/"
            filename = "_".join(ID.split("_")[:-1]) + ".npz"
            tempo.append(int(ID.split("-")[1]))
            frame_idx = int(ID.split("_")[-1])
            
            # load a context window centered around the frame index
            loaded = np.load(data_dir + filename)
            full_x = np.pad(loaded["repr"], [(self.halfwin,self.halfwin), (0,0)], mode='constant')
            sample_x = full_x[frame_idx : frame_idx + self.con_win_size]
            X[i,] = np.expand_dims(np.swapaxes(sample_x, 0, 1), -1)

            # Store label
            y[i,] = loaded["labels"][frame_idx]

        return X, y, tempo

In [None]:
import datetime
import pandas as pd

batch_size=128
epochs=8
con_win_size = 9
spec_repr="c"
data_path="/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr/"
# data_path2="/content/drive/MyDrive/lab/2021/MyTabCNN/data/GuitarSet/annotation"
# data_path3="/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/"
id_file="id.csv"
id3_file="id3.npy"

csv_file = data_path + id_file
#jams_file = os.listdir(data_path2)
list_IDs = list(pd.read_csv(csv_file, header=None)[0])
list_IDs2 = []
# for i in jams_file:
#   if i[0] != ".":
#     list_IDs2.append(i[:-5])

def partition_data(data_split):
    data_split = data_split
    partition = {}
    partition["training"] = []
    partition["validation"] = []
    for ID in list_IDs:
        guitarist = int(ID.split("_")[0])
        if guitarist == data_split:
            partition["validation"].append(ID)
        else:
            partition["training"].append(ID)
    training_generator = MyDataGenerator(partition['training'], 
                                            data_path=data_path, 
                                            batch_size=batch_size, 
                                            shuffle=False,
                                            spec_repr=spec_repr, 
                                            con_win_size=con_win_size)
    
    validation_generator = MyDataGenerator(partition['validation'], 
                                            data_path=data_path, 
                                            batch_size=batch_size, 
                                            shuffle=False,
                                            spec_repr=spec_repr, 
                                            con_win_size=con_win_size)
    return training_generator, validation_generator

# def partition_data2(data_split):
#     data_split = data_split
#     partition = {}
#     partition["training"] = []
#     partition["validation"] = []
#     for ID2 in list_IDs2:
#       guitarist = int(ID2.split("_")[0])
#       if guitarist == data_split:
#           partition["validation"].append(ID2)
#       else:
#           partition["training"].append(ID2)
#     training_generator = MyDataGenerator2(partition['training'], 
#                                             data_path=data_path, 
#                                             batch_size=batch_size,
#                                             spec_repr=spec_repr)
    
#     validation_generator = MyDataGenerator2(partition['validation'], 
#                                             data_path=data_path, 
#                                             batch_size=batch_size,
#                                             spec_repr=spec_repr)
#     return training_generator, validation_generator

# def partition_data3(data_split):
#     data_split = data_split
#     partition = {}
#     partition["training"] = []
#     partition["validation"] = []
#     for ID in list_IDs3:
#         guitarist = int(ID.split("_")[0])
#         if guitarist == data_split:
#             partition["validation"].append(ID)
#         else:
#             partition["training"].append(ID)
#     training_generator = MyDataGenerator3(partition['training'], 
#                                             data_path=data_path, 
#                                             batch_size=batch_size, 
#                                             shuffle=False,
#                                             spec_repr=spec_repr, 
#                                             con_win_size=con_win_size)
    
#     validation_generator = MyDataGenerator3(partition['validation'], 
#                                             data_path=data_path, 
#                                             batch_size=batch_size, 
#                                             shuffle=False,
#                                             spec_repr=spec_repr, 
#                                             con_win_size=con_win_size)
#     return training_generator, validation_generator
  
# def partition_data4(data_split):
#     data_split = data_split
#     partition = {}
#     partition["training"] = []
#     partition["validation"] = []
#     for ID in list_IDs:
#         guitarist = int(ID.split("_")[0])
#         if guitarist == data_split:
#             partition["validation"].append(ID)
#         else:
#             partition["training"].append(ID)
#     training_generator = MyDataGenerator4(partition['training'], 
#                                             data_path=data_path,
#                                             batch_size=batch_size, 
#                                             shuffle=False,
#                                             spec_repr=spec_repr, 
#                                             con_win_size=con_win_size)
    
#     validation_generator = MyDataGenerator4(partition['validation'], 
#                                             data_path=data_path,
#                                             batch_size=batch_size, 
#                                             shuffle=False,
#                                             spec_repr=spec_repr, 
#                                             con_win_size=con_win_size)
#     return training_generator, validation_generator

In [None]:
import torch
import torch.nn as nn
import torch.onnx as onnx
import torchvision.models as models
import torch.nn.functional as f
class MyCNN(torch.nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, 3)
        self.conv2 = torch.nn.Conv2d(32, 64, 3)
        self.conv3 = torch.nn.Conv2d(64, 64, 3)
 
        self.pool = torch.nn.MaxPool2d(2, 2)  # カーネルサイズ, ストライド

        self.drop1 = torch.nn.Dropout2d(0.25)
        self.drop2 = torch.nn.Dropout(0.5)
        self.flatten = torch.nn.Flatten()

        self.fc1 = torch.nn.Linear(5952,128)
        self.fc2 = torch.nn.Linear(128,126)
        self.softmax = nn.Softmax(dim=2)
        
 
    def forward(self, x):
        # print(x.size())
        x = f.relu(self.conv1(x))
        # print(x.size())
        x = f.relu(self.conv2(x))
        #print(x.size())
        x = f.relu(self.conv3(x))
        #print(x.size())
        x = self.pool(x)
        #print(x.size())
        x = self.drop1(x)
        #print(x.size())
        x = self.flatten(x)
        #print(x.size())
        x = f.relu(self.fc1(x))
        #print(x.size())
        x = self.fc2(x)
        #print(x.size())
        x = torch.reshape(x, (128, 6, 21))
        #m = nn.Softmax(dim=2)
        x_out = x
        #print(x_out.shape)
        return x_out

In [None]:
class MyCNN2(torch.nn.Module):
    def __init__(self):
        super(MyCNN2, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, 3)
        self.conv2 = torch.nn.Conv2d(32, 64, 3)
        self.conv3 = torch.nn.Conv2d(64, 64, 3)
 
        self.pool = torch.nn.MaxPool2d(2, 2)  # カーネルサイズ, ストライド

        self.drop1 = torch.nn.Dropout2d(0.25)
        self.drop2 = torch.nn.Dropout(0.5)
        self.flatten = torch.nn.Flatten()

        self.fc1 = torch.nn.Linear(190464,512)
        self.fc2 = torch.nn.Linear(512,126)
        
 
    def forward(self, x):
        # print(x.size())
        x = f.relu(self.conv1(x))
        # print(x.size())
        x = f.relu(self.conv2(x))
        #print(x.size())
        x = f.relu(self.conv3(x))
        #print(x.size())
        x = self.pool(x)
        #print(x.size())
        x = self.drop1(x)
        #print(x.size())
        x = self.flatten(x)
        #print(x.size())
        x = f.relu(self.fc1(x))
        x = self.drop2(x)
        #print(x.size())
        x = self.fc2(x)
        #print(x.size())
        x = torch.reshape(x, (128, 6, 21)) 
        #m = nn.Softmax(dim=2)
        x_out = x
        #print(x_out.shape)
        return x_out

In [None]:
class MyVGG(torch.nn.Module):
  def __init__(self):
    super(MyVGG, self).__init__()
    self.conv1 = torch.nn.Conv2d(1, 32, 3)
    self.bn1 = nn.BatchNorm2d(32)
    self.conv2 = torch.nn.Conv2d(32, 32, 3, padding=1)
    self.bn2 = nn.BatchNorm2d(32)
    self.conv3 = torch.nn.Conv2d(32, 64, 3)
    self.bn3 = nn.BatchNorm2d(64)
    self.conv4 = torch.nn.Conv2d(64, 64, 3, padding=1)
    self.bn4 = nn.BatchNorm2d(64)
    self.conv5 = torch.nn.Conv2d(64, 128, 3)
    self.bn5 = nn.BatchNorm2d(128)
    self.conv6 = torch.nn.Conv2d(128, 256, 3, padding=1)
    self.bn6 = nn.BatchNorm2d(256)
    self.conv7 = torch.nn.Conv2d(256, 256, 3, padding=1)
    self.bn7 = nn.BatchNorm2d(256)
    self.conv8 = torch.nn.Conv2d(256, 512, 3, padding=1)
    self.bn8 = nn.BatchNorm2d(512)
    self.conv9 = torch.nn.Conv2d(512, 512, 3, padding=1)
    self.bn9 = nn.BatchNorm2d(512)
    self.relu = nn.ReLU(inplace=True)

    self.pool = torch.nn.MaxPool2d(2, 2)

    self.drop1 = torch.nn.Dropout2d(0.25)
    self.drop2 = torch.nn.Dropout(0.5)
    self.flatten = torch.nn.Flatten()

    self.fc1 = torch.nn.Linear(47616,128)
    self.fc2 = torch.nn.Linear(128,126)

  def forward(self, x):
    #print(x.size())
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    #print(out.size())
    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)
    #print(out.size())
    out = self.conv3(out)
    out = self.bn3(out)
    out = self.relu(out)
    #print(out.size())
    out = self.conv4(out)
    out = self.bn4(out)
    out = self.relu(out)
    #print(out.size())
    out = self.conv5(out)
    out = self.bn5(out)
    out = self.relu(out)
    out = self.conv6(out)
    out = self.bn6(out)
    out = self.relu(out)
    out = self.conv7(out)
    out = self.bn7(out)
    out = self.relu(out)
    out = self.conv8(out)
    out = self.bn8(out)
    out = self.relu(out)
    out = self.conv9(out)
    out = self.bn9(out)
    out = self.relu(out)

    out = self.pool(out)
    out = self.drop1(out)
    #print(out.size())
    out = self.flatten(out)
    #print(out.size())
    out = self.fc1(out)
    out = self.relu(out)
    out = self.drop2(out)
    out = self.fc2(out)
    out = torch.reshape(out, (batch_size, 6, 21))

    return out

In [None]:
!pip install einops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from einops import rearrange, repeat
from einops.layers.torch import Rearrange
# helpers

def pair(t):
    return t if isinstance(t, tuple) else (t, t)

# classes

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        project_out = not (heads == 1 and dim_head == dim)

        self.heads = heads
        self.scale = dim_head ** -0.5

        self.attend = nn.Softmax(dim = -1)
        self.dropout = nn.Dropout(dropout)

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x):
        qkv = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)

        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale

        attn = self.attend(dots)
        attn = self.dropout(attn)

        out = torch.matmul(attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        return self.to_out(out)

class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
            ]))
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
        return x

class tab_transformer(nn.Module):
    def __init__(self, *, input_size, input_hight, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        self.input_size = input_size
        self.input_liniear = nn.Linear(input_size, dim)
        self.pos_embedding = nn.Parameter(torch.randn(1, input_hight + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)
        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
        self.pool = pool
        self.to_latent = nn.Identity()
        self.flatten = nn.Flatten(1, -1)
        # self.mlp_head = nn.Sequential(
        #     nn.LayerNorm(193*dim),
        #     nn.Linear(193*dim, input_size*6*21)
        # )
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, 6*21)
        )


    def forward(self, x):
      #
        x = self.input_liniear(x)
        print(x)
        b, n, _ = x.shape
        cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        x = self.transformer(x)
        x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
        x = self.to_latent(x)
        x = self.mlp_head(x)
        x = torch.reshape(x, (-1, 6, 21))
        return x

In [None]:
def tab2pitch(tab):
    pitch_vector = np.zeros(44)
    string_pitches = [40, 45, 50, 55, 59, 64]
    for string_num in range(len(tab)):
        fret_class = tab[string_num]
        # 0 means that the string is closed 
        if fret_class > 0:
            pitch_num = fret_class + string_pitches[string_num] - 41
            pitch_vector[pitch_num] = 1
    return pitch_vector

def pitch_precision(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_pred.flatten())
    return (1.0 * numerator) / denominator

def pitch_recall(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_gt.flatten())
    return (1.0 * numerator) / denominator

def pitch_f_measure(pred, gt):
    p = pitch_precision(pred, gt)
    r = pitch_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

def tab2bin(tab):
    tab_arr = np.zeros((6,20))
    for string_num in range(len(tab)):
        fret_class = tab[string_num]
        # 0 means that the string is closed 
        if fret_class > 0:
            fret_num = fret_class - 1
            tab_arr[string_num][fret_num] = 1
    return tab_arr

def tab_precision(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_pred.flatten())
    return (1.0 * numerator) / denominator

def tab_recall(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_gt.flatten())
    return (1.0 * numerator) / denominator

def tab_f_measure(pred, gt):
    p = tab_precision(pred, gt)
    r = tab_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

In [None]:
import scipy.stats as stats

net = MyCNN()
net2 = MyCNN2()
vgg = MyVGG()
#net = torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/new/mytabcnn-model.path",map_location='cuda:0')
net = torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/new/mytabcnn-model.path",map_location="cuda:0")
# net2 = torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/new/mytabcnn2-model.path",map_location="cuda:0")
# netvgg = torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/new/myvgg-model.path",map_location="cuda:0")
net.eval()
acc = {}
acc["pp"] = []
acc["pr"] = []
acc["pf"] = []
acc["tp"] = []
acc["tr"] = []
acc["tf"] = []
acc["tdr"] = []
acc2 = {}
acc2["pp"] = []
acc2["pr"] = []
acc2["pf"] = []
acc2["tp"] = []
acc2["tr"] = []
acc2["tf"] = []
acc2["tdr"] = []
accvgg = {}
accvgg["pp"] = []
accvgg["pr"] = []
accvgg["pf"] = []
accvgg["tp"] = []
accvgg["tr"] = []
accvgg["tf"] = []
accvgg["tdr"] = []
training_generator, validation_generator = partition_data(0)
for i in range(615):
  input_data1, input_data2, tempo = validation_generator[i]
  images = input_data1.astype(np.float32)
  labels = input_data2.astype(np.float32)
  images = torch.tensor(np.transpose(images, (0,3,1,2)), device = "cuda:0")
  labels = torch.tensor(labels)
  output = net(images)
  _, pre = torch.max(output.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()
  pred = np.zeros((128, 6), dtype=np.int16)

  note16 = 2580/4/tempo
  for j in range(int(batch_size//note16)):
    out = pre[int(j * note16):int((j+1) * note16)]
    mode_val, mode_num = stats.mode(out, axis=0)
    for k in range(len(out)):
      for l, fret in enumerate(mode_val[0]):
        pred[int(j * note16)+k][l] = fret

  label = labels.to('cpu').detach().numpy().copy()
  gt = np.argmax(label, 2)
  acc["pp"].append(pitch_precision(pred, gt))
  acc["pr"].append(pitch_recall(pred, gt))
  acc["pf"].append(pitch_f_measure(pred, gt))
  acc["tp"].append(tab_precision(pred, gt))
  acc["tr"].append(tab_recall(pred, gt))
  acc["tf"].append(tab_f_measure(pred, gt))
for i in range(615):
  input_data1, input_data2, tempo = validation_generator[i]
  images = input_data1.astype(np.float32)
  labels = input_data2.astype(np.float32)
  images = torch.tensor(np.transpose(images, (0,3,1,2)), device = "cuda:0")
  labels = torch.tensor(labels)
  output = net(images)
  _, pre = torch.max(output.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()

  label = labels.to('cpu').detach().numpy().copy()
  gt = np.argmax(label, 2)
  acc2["pp"].append(pitch_precision(pre, gt))
  acc2["pr"].append(pitch_recall(pre, gt))
  acc2["pf"].append(pitch_f_measure(pre, gt))
  acc2["tp"].append(tab_precision(pre, gt))
  acc2["tr"].append(tab_recall(pre, gt))
  acc2["tf"].append(tab_f_measure(pre, gt))
for i in range(615):
  input_data1, input_data2, tempo = validation_generator[i]
  images = input_data1.astype(np.float32)
  labels = input_data2.astype(np.float32)
  images = torch.tensor(np.transpose(images, (0,3,1,2)), device = "cuda:0")
  labels = torch.tensor(labels)
  output = netvgg(images)
  _, pre = torch.max(output.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()

  label = labels.to('cpu').detach().numpy().copy()
  gt = np.argmax(label, 2)
  accvgg["pp"].append(pitch_precision(pre, gt))
  accvgg["pr"].append(pitch_recall(pre, gt))
  accvgg["pf"].append(pitch_f_measure(pre, gt))
  accvgg["tp"].append(tab_precision(pre, gt))
  accvgg["tr"].append(tab_recall(pre, gt))
  accvgg["tf"].append(tab_f_measure(pre, gt))

KeyboardInterrupt: ignored

In [None]:
netvgg.eval()
acc = {}
acc["pp"] = []
acc["pr"] = []
acc["tp"] = []
acc["tr"] = []
training_generator, validation_generator = partition_data(0)
for i in range(615):
  input_data1, input_data2, tempo = validation_generator[i]
  images = input_data1.astype(np.float32)
  labels = input_data2.astype(np.float32)
  images = torch.tensor(np.transpose(images, (0,3,1,2)), device = "cuda:0")
  labels = torch.tensor(labels)
  output = netvgg(images)
  _, pre = torch.max(output.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()

  label = labels.to('cpu').detach().numpy().copy()
  gt = np.argmax(label, 2)
  acc["pp"].append(pitch_precision(pre, gt))
  acc["pr"].append(pitch_recall(pre, gt))
  acc["tp"].append(tab_precision(pre, gt))
  acc["tr"].append(tab_recall(pre, gt))
#np.savez("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved/acc.npz", acc)



In [None]:
from librosa.core.spectrum import perceptual_weighting
import scipy.stats as stats

net = MyCNN()
net = torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/c_2022-05-13_16-33-31/path/mytabcnn-model.path",map_location='cuda:0')
net.eval()
acc3 = {}
acc3["pp"] = []
acc3["pr"] = []
acc3["pf"] = []
acc3["tp"] = []
acc3["tr"] = []
acc3["tf"] = []
acc3["tdr"] = []

windowsize = 2

training_generator, validation_generator = partition_data(0)
for i in range(615):
  input_data1, input_data2, tempo = validation_generator[i]
  images = input_data1.astype(np.float32)
  labels = input_data2.astype(np.float32)
  images = torch.tensor(np.transpose(images, (0,3,1,2)), device = "cuda:0")
  labels = torch.tensor(labels)
  output = net(images)
  _, pre = torch.max(output.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()

  pret = pre.T
  for j, string in enumerate(pret):
    index = np.arange(len(string)-(2*windowsize))
    np.random.shuffle(index)
    for k in index:
      window = pret[j][k:k+(2*windowsize)+1]
      mode_val, mode_num = stats.mode(window, axis = 0)
      pret[j][k] = mode_val[0]
  pred = pret.T

  label = labels.to('cpu').detach().numpy().copy()
  gt = np.argmax(label, 2)
  acc3["pp"].append(pitch_precision(pred, gt))
  acc3["pr"].append(pitch_recall(pred, gt))
  acc3["pf"].append(pitch_f_measure(pred, gt))
  acc3["tp"].append(tab_precision(pred, gt))
  acc3["tr"].append(tab_recall(pred, gt))
  acc3["tf"].append(tab_f_measure(pred, gt))



In [None]:
type(acc3)

NameError: ignored

In [None]:
pp = np.average(acc["pp"])
pr = np.average(acc["pr"])
tp = np.average(acc["tp"])
tr = np.average(acc["tr"])
pp2 = np.average(acc2["pp"])
pr2 = np.average(acc2["pr"])
tp2 = np.average(acc2["tp"])
tr2 = np.average(acc2["tr"])

print("pp:" + str(pp) + " " + str(pp2))
print("pr:" + str(pr) + " " + str(pr2))
print("tp:" + str(tp) + " " + str(tp2))
print("tr:" + str(tr) + " " + str(tr2))
print("pf:" + str((2 * pp * pr)/(pp + pr)) + " " + str((2 * pp2 * pr2)/(pp2 + pr2)))
print("pf:" + str((2 * tp * tr)/(tp + tr)) + " " + str((2 * tp2 * tr2)/(tp2 + tr2)))

pp:0.8165048115130572 0.8329365695013382
pr:0.6724472672836339 0.6281856500520757
tp:0.6520357358297051 0.6720381347889317
tr:0.5673035914100911 0.5338326420650186
pf:0.7375071864900022 0.7162149659516778
pf:0.6067256364169871 0.5950154858197935


In [None]:
loaded = np.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved/acc.npz", allow_pickle=True)
acc = loaded["arr_0"]
acc = acc.item()
print(sum(acc["tp"])/len(acc["tp"]), np.average(acc["tr"]))
# for i, out in enumerate(acc["tf"]):
#   if np.isnan(out):
#     print(i)
#     print(out)
input_data1, input_data2, tempo = validation_generator[610]
images = input_data1.astype(np.float32)
labels = input_data2.astype(np.float32)
images = torch.tensor(np.transpose(images, (0,3,1,2)), device = "cuda:0")
labels = torch.tensor(labels)
output = net(images)
_, pre = torch.max(output.data, 2)
label = labels.to('cpu').detach().numpy().copy()
gt = np.argmax(label, 2)
# print(tab2bin(pre[0]))
# print(tab2bin(gt[0]))
# print(tab_precision(pre, gt))
# print(tab_recall(pre, gt))
tab_f_measure(pre, gt)

0.7186167916471766 0.5910489966052926


NameError: ignored

In [None]:
!pip install pydub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from matplotlib import animation, rc
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
import IPython.display

In [None]:
annos_path = "/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr/c/"
anno_list = os.listdir(annos_path)

$$
Loss = -1/N\sum_{j=1}^6\sum_{i=1}^Nlogp[z_{ij}\in C_{z_{ij}}] + \beta(\sum_{i=1}^N1/6\sum_{j=1}^6(x_{ij}-\overline{x_i})^2)
$$

$$
(\beta = 0.1,lr)
$$

$$
Loss = -1/N\sum_{j=1}^6\sum_{i=1}^Nlogp[z_{ij}\in C_{z_{ij}}] + \beta(\sum_{i=1}^N1/5\sum_{j=i-2}^{i+2}(\overline{x}_j-\overline{\overline{x}})^2)
$$

In [None]:
#%matplotlib nbagg
plt.rcParams['figure.figsize'] = (16.0, 6.0)

import scipy.stats as stats

net = MyCNN()
net = torch.load("/content/drive/MyDrive/gozu/2021/MyTabCNN/model/saved2/new/mytabcnn-model.path",map_location="cuda:0")

file_name = anno_list[24]
anno = annos_path + file_name
load = np.load(anno)
print(anno)
tempo = int(file_name.split("-")[1])
print(tempo)
label = load["labels"]
repr = load["repr"]

fig = plt.figure()

ims = []

labels = label[:len(label)//128*128]
repr = np.pad(repr, [(4,4), (0,0)], mode = "constant")
input_data = []
for i in range(len(repr)-8):
  input_data.append(repr[i:i+9])
repr = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,3,2,0))
input = []
for i in range(len(repr)//128):
  input.append(repr[i*128:(i+1)*128])
input_data = np.array(input)
reprs = np.zeros(((len(repr)//128*128, 6, 21)))
reprs2 = np.zeros(((len(repr)//128*128, 6, 21)))

results = {}
results["pp"] = []
results["pr"] = []
results["tp"] = []
results["tr"] = []
results2 = {}
results2["pp"] = []
results2["pr"] = []
results2["tp"] = []
results2["tr"] = []

for i in range(len(repr)//128):
  images = torch.tensor(input_data[i], dtype=torch.float32, device="cuda")
  outputs = net(images)

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()
  pred = np.zeros((128, 6), dtype=np.int16)

  note16 = 2580/4/tempo
  for j in range(int(batch_size//note16)):
    out = pre[int(j * note16):int((j+1) * note16)]
    mode_val, mode_num = stats.mode(out, axis=0)
    for k in range(len(out)):
      for l, fret in enumerate(mode_val[0]):
        pred[int(j * note16)+k][l] = fret

  label = labels[i*128:(i+1)*128]
  gt = np.argmax(label, 2)
  results["pp"].append(pitch_precision(pred,gt))
  results["pr"].append(pitch_recall(pred,gt))
  results["tp"].append(tab_precision(pred,gt))
  results["tr"].append(tab_recall(pred,gt))
  results2["pp"].append(pitch_precision(pre,gt))
  results2["pr"].append(pitch_recall(pre,gt))
  results2["tp"].append(tab_precision(pre,gt))
  results2["tr"].append(tab_recall(pre,gt))

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()

  note16 = 2580/4/tempo
  for j in range(int(batch_size//note16)):
    out = pre[int(j * note16):int((j+1) * note16)]
    mode_val, mode_num = stats.mode(out, axis=0)
    for k in range(len(out)):
      for l, fret in enumerate(mode_val[0]):
        pred[int(j * note16)+k][l] = fret

  for j, strings in enumerate(pred):
    for k, fret in enumerate(strings):
      reprs[i*128+j][k][fret] = 1
  for j, strings in enumerate(pre):
    for k, fret in enumerate(strings):
      reprs2[i*128+j][k][fret] = 1
reprs = np.array(reprs)
reprs2 = np.array(reprs2)

print(reprs.shape, labels.shape)
for i in results:
  print(i + str(np.average(results[i])) + " " + str(np.average(results2[i])))

for i, (rp, rp2, lb) in enumerate(zip(reprs, reprs2, labels)):
  im = Image.new('RGB', (1000, 350), (255, 255, 255))
  draw = ImageDraw.Draw(im)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for j in range(6):
    draw.line(((523, (j+1) * 50), (980, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line(((500 + (j+1) * 23, 50), (500 + (j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for string, (j_pr, j_lb) in enumerate(zip(rp, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          # if k_lb == 1:
          #   if k_pr == k_lb:
          #     draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
          #   else:
          #     draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  for string, (j_pr, j_lb) in enumerate(zip(rp2, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          # if k_pr == 1:
          #   draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))
            # if k_pr == k_lb:
            #   draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            # else:
            #   draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  im = plt.imshow(np.array(im))
  
  ims.append([im])
  # if i == 10:
  #   break

ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#%matplotlib nbagg
plt.rcParams['figure.figsize'] = (16.0, 6.0)

import scipy.stats as stats


net = torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/new/mytabcnn-model.path",map_location="cuda:0")
vec1 = torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/saved2/new/tabvec2-model.path",map_location="cuda:0")

file_name = anno_list[0]
anno = annos_path + file_name
load = np.load(anno)
print(anno)
tempo = int(file_name.split("-")[1])
print(tempo)
label = load["labels"]
repr = load["repr"]

fig = plt.figure()

ims = []

labels = label[:len(label)//128*128]
repr = np.pad(repr, [(4,4), (0,0)], mode = "constant")
input_data = []
for i in range(len(repr)-8):
  input_data.append(repr[i:i+9])
repr = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,3,2,0))
input = []
for i in range(len(repr)//128):
  input.append(repr[i*128:(i+1)*128])
input_data = np.array(input)
reprs = np.zeros(((len(repr)//128*128, 6, 21)))
reprsvec = np.zeros(((len(repr)//128*128, 6, 21)))

results = {}
results["pp"] = []
results["pr"] = []
results["tp"] = []
results["tr"] = []
resultsvec = {}
resultsvec["pp"] = []
resultsvec["pr"] = []
resultsvec["tp"] = []
resultsvec["tr"] = []

for i in range(len(repr)//128):
  images = torch.tensor(input_data[i], dtype=torch.float32, device="cuda")
  outputs = net(images)
  outputsvec = vec1(images)

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()
  _, prevec = torch.max(outputsvec.data, 2)
  prevec = prevec.to('cpu').detach().numpy().copy()

  label = labels[i*128:(i+1)*128]
  gt = np.argmax(label, 2)
  results["pp"].append(pitch_precision(pre,gt))
  results["pr"].append(pitch_recall(pre,gt))
  results["tp"].append(tab_precision(pre,gt))
  results["tr"].append(tab_recall(pre,gt))
  resultsvec["pp"].append(pitch_precision(prevec,gt))
  resultsvec["pr"].append(pitch_recall(prevec,gt))
  resultsvec["tp"].append(tab_precision(prevec,gt))
  resultsvec["tr"].append(tab_recall(prevec,gt))

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()
  _, prevec = torch.max(outputsvec.data, 2)
  prevec = prevec.to('cpu').detach().numpy().copy()

  for j, strings in enumerate(pre):
    for k, fret in enumerate(strings):
      reprs[i*128+j][k][fret] = 1
  for j, strings in enumerate(prevec):
    for k, fret in enumerate(strings):
      reprsvec[i*128+j][k][fret] = 1
reprs = np.array(reprs)
reprsvec = np.array(reprsvec)

print(reprs.shape, labels.shape)
for i in results:
  print(i + str(np.average(results[i])) + " " + str(np.average(resultsvec[i])))

for i, (rp, rp2, lb) in enumerate(zip(reprs, reprsvec, labels)):
  im = Image.new('RGB', (1000, 350), (255, 255, 255))
  draw = ImageDraw.Draw(im)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for j in range(6):
    draw.line(((523, (j+1) * 50), (980, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line(((500 + (j+1) * 23, 50), (500 + (j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for string, (j_pr, j_lb) in enumerate(zip(rp, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            if k_pr == k_lb:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            else:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  for string, (j_pr, j_lb) in enumerate(zip(rp2, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            if k_pr == k_lb:
              draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            else:
              draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  im = plt.imshow(np.array(im))
  
  ims.append([im])
  # if i == 10:
  #   break

ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#%matplotlib nbagg
plt.rcParams['figure.figsize'] = (16.0, 6.0)

import scipy.stats as stats

net = MyCNN()
trnsfrmr = tab_transformer(
        input_size = 9,
        input_hight = 192, 
        dim = 1024,
        depth = 6,
        heads = 16,
        mlp_dim = 2048,
        dropout = 0.1,
        emb_dropout = 0.1
        )
net = torch.load("/content/drive/MyDrive/gozu/2021/MyTabCNN/model/saved2/new/mytabcnn-model.path",map_location="cuda:0")
trnsfrmr = torch.load("/content/drive/MyDrive/gozu/2021/MyTabCNN/model/saved2/new/mytransformer-model.path",map_location="cuda:0")

file_name = anno_list[0]
anno = annos_path + file_name
load = np.load(anno)
print(anno)
tempo = int(file_name.split("-")[1])
print(tempo)
label = load["labels"]
repr = load["repr"]

fig = plt.figure()

ims = []

labels = label[:len(label)//128*128]
repr = np.pad(repr, [(4,4), (0,0)], mode = "constant")
input_data = []
for i in range(len(repr)-8):
  input_data.append(repr[i:i+9])
repr = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,3,2,0))
input = []
for i in range(len(repr)//128):
  input.append(repr[i*128:(i+1)*128])
input_data = np.array(input)
reprs = np.zeros(((len(repr)//128*128, 6, 21)))
reprsvec = np.zeros(((len(repr)//128*128, 6, 21)))

results = {}
results["pp"] = []
results["pr"] = []
results["tp"] = []
results["tr"] = []
resultsvec = {}
resultsvec["pp"] = []
resultsvec["pr"] = []
resultsvec["tp"] = []
resultsvec["tr"] = []

for i in range(len(repr)//128):
  images = torch.tensor(input_data[i], dtype=torch.float32, device="cuda")
  outputs = net(images)
  outputsvec = trnsfrmr(images)

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()
  _, prevec = torch.max(outputsvec.data, 2)
  prevec = prevec.to('cpu').detach().numpy().copy()

  label = labels[i*128:(i+1)*128]
  gt = np.argmax(label, 2)
  results["pp"].append(pitch_precision(pre,gt))
  results["pr"].append(pitch_recall(pre,gt))
  results["tp"].append(tab_precision(pre,gt))
  results["tr"].append(tab_recall(pre,gt))
  resultsvec["pp"].append(pitch_precision(prevec,gt))
  resultsvec["pr"].append(pitch_recall(prevec,gt))
  resultsvec["tp"].append(tab_precision(prevec,gt))
  resultsvec["tr"].append(tab_recall(prevec,gt))

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()
  _, prevec = torch.max(outputsvec.data, 2)
  prevec = prevec.to('cpu').detach().numpy().copy()

  for j, strings in enumerate(pre):
    for k, fret in enumerate(strings):
      reprs[i*128+j][k][fret] = 1
  for j, strings in enumerate(prevec):
    for k, fret in enumerate(strings):
      reprsvec[i*128+j][k][fret] = 1
reprs = np.array(reprs)
reprsvec = np.array(reprsvec)

print(reprs.shape, labels.shape)
for i in results:
  print(i + str(np.average(results[i])) + " " + str(np.average(resultsvec[i])))

for i, (rp, rp2, lb) in enumerate(zip(reprs, reprsvec, labels)):
  im = Image.new('RGB', (1000, 350), (255, 255, 255))
  draw = ImageDraw.Draw(im)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for j in range(6):
    draw.line(((523, (j+1) * 50), (980, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line(((500 + (j+1) * 23, 50), (500 + (j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for string, (j_pr, j_lb) in enumerate(zip(rp, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            if k_pr == k_lb:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            else:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  for string, (j_pr, j_lb) in enumerate(zip(rp2, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            if k_pr == k_lb:
              draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            else:
              draw.pieslice(((500-17 + fret*23, 45 + (5-string)*50), (500-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  im = plt.imshow(np.array(im))
  
  ims.append([im])
  # if i == 10:
  #   break

ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr/c/00_BN1-129-Eb_comp.npz
129
torch.Size([128, 1, 192, 1024])


ValueError: ignored

<Figure size 1152x432 with 0 Axes>

In [None]:
for i in zip(results["pp"],results["tp"]):
  print(i)

(0.9808306709265175, 0.9808306709265175)
(0.9572192513368984, 0.9572192513368984)
(0.9682539682539683, 0.9682539682539683)
(0.9925925925925926, 0.9925925925925926)
(0.9813432835820896, 0.9813432835820896)
(0.8535353535353535, 0.8535353535353535)
(0.9741935483870968, 0.9741935483870968)


In [None]:
#%matplotlib nbagg

file_name = anno_list[0]
anno = annos_path + file_name
load = np.load(anno)
print(anno)
tempo = int(file_name.split("-")[1])
print(tempo)
label = load["labels"]
repr = load["repr"]
print(label.shape, repr.shape)

fig = plt.figure()

ims = []

labels = label[:len(label)//128*128]

repr = np.pad(repr, [(4,4), (0,0)], mode = "constant")
input_data = []
for i in range(len(repr)-8):
  input_data.append(repr[i:i+9])
repr = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,3,2,0))
input = []
for i in range(len(repr)//128):
  input.append(repr[i*128:(i+1)*128])
input_data = np.array(input)
reprs = np.zeros(((len(repr)//128*128, 6, 21)))

results = {}
results["pp"] = []
results["pr"] = []
results["pf"] = []
results["tp"] = []
results["tr"] = []
results["tf"] = []

for i in range(len(repr)//128):
  images = torch.tensor(input_data[i], dtype=torch.float32, device="cuda")
  outputs = net(images)

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()

  note16 = 2580/4/tempo
  for i in range(int(batch_size//note16)):
    out = pre[int(i * note16):int((i+1) * note16)]
    mode_val, mode_num = stats.mode(out, axis=0)
    for j in range(len(out)):
      for k, fret in enumerate(mode_val[0]):
        pre[int(i * note16)+j][k] = fret

  label = labels[i*128:(i+1)*128]
  gt = np.argmax(label, 2)
  results["pp"].append(pitch_precision(pre,gt))
  results["pr"].append(pitch_recall(pre,gt))
  results["pf"].append(pitch_f_measure(pre,gt))
  results["tp"].append(tab_precision(pre,gt))
  results["tr"].append(tab_recall(pre,gt))
  results["tf"].append(tab_f_measure(pre,gt))

  _, pre = torch.max(outputs.data, 2)
  pre = pre.to('cpu').detach().numpy().copy()

  note16 = 2580/4/tempo
  for i in range(int(batch_size//note16)):
    out = pre[int(i * note16):int((i+1) * note16)]
    mode_val, mode_num = stats.mode(out, axis=0)
    for j in range(len(out)):
      for k, fret in enumerate(mode_val[0]):
        pre[int(i * note16)+j][k] = fret

  for j, strings in enumerate(pre):
    for k, fret in enumerate(strings):
      reprs[i*128+j][k][fret] = 1
reprs = np.array(reprs)

print(reprs.shape, labels.shape)
for i in results:
  print(i + str(np.average(results[i])))

for rp, lb in zip(reprs, labels):
  im = Image.new('RGB', (500, 350), (255, 255, 255))
  draw = ImageDraw.Draw(im)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for string, (j_pr, j_lb) in enumerate(zip(rp, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            if k_pr == k_lb:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            else:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  im = plt.imshow(np.array(im))
  
  ims.append([im])

ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

/content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr/c/00_BN1-129-Eb_comp.npz
129
(962, 6, 21) (962, 192)


ValueError: ignored

<Figure size 432x288 with 0 Axes>

In [None]:
#%matplotlib nbagg

anno_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/spec_repr/c/00_Jazz3-150-C_comp.npz"
load = np.load(anno_path)
label = load["labels"]
repr = load["repr"]
print(label.shape, repr.shape)

fig = plt.figure()

ims = []

labels = label[:len(label)//128*128]

repr = np.pad(repr, [(50,50), (0,0)], mode = "constant")
input_data = []
for i in range(len(repr)-99):
  input_data.append(repr[i:i+100])
repr = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,3,2,0))
input = []
for i in range(len(repr)//128):
  input.append(repr[i*128:(i+1)*128])
input_data = np.array(input)
reprs = np.zeros(((len(repr)//128*128, 6, 21)))

results = {}
results["pp"] = []
results["pr"] = []
results["pf"] = []
results["tp"] = []
results["tr"] = []
results["tf"] = []

for i in range(len(repr)//128):
  images = torch.tensor(input_data[i], dtype=torch.float32, device="cuda")
  outputs = net(images)

  _, pre = torch.max(outputs.data, 2)
  label = labels[i*128:(i+1)*128]
  gt = np.argmax(label, 2)
  results["pp"].append(pitch_precision(pre,gt))
  results["pr"].append(pitch_recall(pre,gt))
  results["pf"].append(pitch_f_measure(pre,gt))
  results["tp"].append(tab_precision(pre,gt))
  results["tr"].append(tab_recall(pre,gt))
  results["tf"].append(tab_f_measure(pre,gt))

  _, predicted = torch.max(outputs.data, 2)
  for j, strings in enumerate(predicted):
    for k, fret in enumerate(strings):
      reprs[i*128+j][k][fret] = 1
reprs = np.array(reprs)

print(reprs.shape, labels.shape)
for i in results:
  print(i + str(np.average(results[i])))

for rp, lb in zip(reprs, labels):
  im = Image.new('RGB', (500, 350), (255, 255, 255))
  draw = ImageDraw.Draw(im)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for j in range(6):
    draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
  for j in range(20):
    draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

  for string, (j_pr, j_lb) in enumerate(zip(rp, lb)):
    for fret, (k_pr, k_lb) in enumerate(zip(j_pr, j_lb)):
      if not fret == 0:
          if k_pr == 1:
            draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
          if k_lb == 1:
            if k_pr == k_lb:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 255))
            else:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(0, 0, 255))

  im = plt.imshow(np.array(im))
  
  ims.append([im])

ani = animation.ArtistAnimation(fig, ims, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

(1103, 6, 21) (1103, 192)


RuntimeError: ignored

<Figure size 432x288 with 0 Axes>

In [None]:
audio_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/GuitarSet/audio/audio_mic/00_Jazz3-150-C_comp_mic.wav"
IPython.display.Audio(audio_path)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
!pip install ffmpeg

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ffmpeg
  Downloading ffmpeg-1.4.tar.gz (5.1 kB)
Building wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Created wheel for ffmpeg: filename=ffmpeg-1.4-py3-none-any.whl size=6084 sha256=1c171cef76bda50246777bb6df86cd038ee5099bc346a7c0d517e304f55258ec
  Stored in directory: /root/.cache/pip/wheels/64/80/6e/caa3e16deb0267c3cbfd36862058a724144e19fdb9eb03af0f
Successfully built ffmpeg
Installing collected packages: ffmpeg
Successfully installed ffmpeg-1.4


In [None]:
net: MyCNN = MyCNN()
net= torch.load("/content/drive/MyDrive/lab/2021/MyTabCNN/model/mytabcnn-model.path",map_location='cuda:0')
net.eval()

MyCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout2d(p=0.25, inplace=False)
  (drop2): Dropout(p=0.5, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=5952, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=126, bias=True)
)

In [None]:
#%matplotlib nbagg
data = load["repr"]
print(data.shape)
fig = plt.figure()
ims2 = []
data = np.pad(data, [(4,4), (0,0)], mode = "constant")
input_data = []
for i in range(len(data)-8):
  input_data.append(data[i:i+9])
data = np.transpose(np.expand_dims(np.swapaxes(input_data, 0, 1), -1), (1,3,2,0))
input = []
print(data.shape)
for i in range(len(data)//128):
  input.append(data[i*128:(i+1)*128])
input_data = np.array(input)
for i in range(len(data)//128):
  images = torch.tensor(input_data[i], dtype=torch.float32, device="cuda")
  outputs = net(images)
  _, predicted = torch.max(outputs.data, 2)
  data = np.zeros(((128, 6, 21)))
  for i, strings in enumerate(predicted):
    for j, fret in enumerate(strings):
      data[i][j][fret] = 1
  #print(data[0])

  for i in data:
    im = Image.new('RGB', (500, 350), (255, 255, 255))
    draw = ImageDraw.Draw(im)

    for j in range(6):
      draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
    for j in range(20):
      draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

    for string, j in enumerate(i):
      for fret, k in enumerate(j):
        if not fret == 0:
            if k == 1:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
    im = plt.imshow(np.array(im))
      
    ims2.append([im])
  
ani = animation.ArtistAnimation(fig, ims2, interval=23)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
#%matplotlib nbagg
training_generator, validation_generator, partitionv = partition_data(0)
fig = plt.figure()
ims2 = []
for h in range(9):
  input_data = validation_generator[h]
  images = torch.tensor(np.transpose(input_data[0], (0,3,1,2)), dtype=torch.float32)
  print(images.dtype)
  outputs = net(images)
  _, predicted = torch.max(outputs.data, 2)
  data = np.zeros(((128, 6, 21)))
  for i, strings in enumerate(predicted):
    for j, fret in enumerate(strings):
      data[i][j][fret] = 1
  #print(data[0])

  for i in data:
    im = Image.new('RGB', (500, 350), (255, 255, 255))
    draw = ImageDraw.Draw(im)

    for j in range(6):
      draw.line(((23, (j+1) * 50), (480, (j+1) * 50)), fill=(0, 0, 0), width=2)
    for j in range(20):
      draw.line((((j+1) * 23, 50), ((j+1) * 23, 300)), fill=(0, 0, 0), width=2)

    for string, j in enumerate(i):
      for fret, k in enumerate(j):
        if not fret == 0:
            if k == 1:
              draw.pieslice(((-17 + fret*23, 45 + (5-string)*50), (-7 + fret*23, 55 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
    im = plt.imshow(np.array(im))
    
    ims2.append([im])

ani = animation.ArtistAnimation(fig, ims2, interval=)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
from scipy.io import wavfile
import librosa
from librosa import display

class AudioDataGenerator:
  def __init__(self, path, mode="c"):
    self.path = path

    # prepresentation and its labels storage
    self.output = {}

    self.preproc_mode = mode
    self.downsample = True
    self.normalize = True
    self.sr_downs = 22050

    # CQT parameters
    self.cqt_n_bins = 192
    self.cqt_bins_per_octave = 24

    # STFT parameters
    self.n_fft = 2048
    self.hop_length = 512

  def load_audiodata(self):
    file_path = self.path
    file_audio = file_path
    self.sr_original, data = wavfile.read(file_audio)
    self.sr_curr = self.sr_original

    # preprocess audio, store in output dict
    self.output["repr"] = np.swapaxes(self.preprocess_audio(data),0,1)

    # construct labels
    frame_indices = range(len(self.output["repr"]))
    times = librosa.frames_to_time(frame_indices, sr = self.sr_curr, hop_length=self.hop_length)

    labels = np.zeros(((128, 6 ,21)))
    # store and return
    self.output["labels"] = labels
    print(np.shape(self.output["repr"]))
    print(np.shape(self.output["labels"]))
    return self.output

  def preprocess_audio(self, data):
      data = data.astype(float)
      if self.normalize:
          data = librosa.util.normalize(data)
      if self.downsample:
          data = librosa.resample(data, self.sr_original, self.sr_downs)
          self.sr_curr = self.sr_downs
      if self.preproc_mode == "c":
          data = np.abs(librosa.cqt(data,
              hop_length=self.hop_length, 
              sr=self.sr_curr, 
              n_bins=self.cqt_n_bins, 
              bins_per_octave=self.cqt_bins_per_octave))
          cqt_amplitude = data
          fig = plt.figure()
          ax = fig.add_subplot()
          librosa.display.specshow(librosa.amplitude_to_db(cqt_amplitude, ref=np.max), sr=self.sr_curr, x_axis='time', y_axis='cqt_hz')
          plt.colorbar(format='%+2.0f dB')
          ax.set_title('constant-Q power spectrum')
          plt.tight_layout()
          plt.show()
    
      elif self.preproc_mode == "m":
          data = librosa.feature.melspectrogram(y=data, sr=self.sr_curr, n_fft=self.n_fft, hop_length=self.hop_length)
      elif self.preproc_mode == "cm":
          cqt = np.abs(librosa.cqt(data, 
              hop_length=self.hop_length, 
              sr=self.sr_curr, 
              n_bins=self.cqt_n_bins, 
              bins_per_octave=self.cqt_bins_per_octave))
          mel = librosa.feature.melspectrogram(y=data, sr=self.sr_curr, n_fft=self.n_fft, hop_length=self.hop_length)
          data = np.concatenate((cqt,mel),axis = 0)
      elif self.preproc_mode == "s":
          data = np.abs(librosa.stft(data, n_fft=self.n_fft, hop_length=self.hop_length))
      else:
          print ("invalid representation mode.")
      print(np.shape(data))

      return data

In [None]:
class DataGenerator2():
    
    def __init__(self, data, batch_size=128, shuffle=False, label_dim = (6,21), spec_repr="c", con_win_size=9):
        
        self.list_IDs = np.arange(len(data["repr"]))
        self.data = data
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.label_dim = label_dim
        self.spec_repr = spec_repr
        self.con_win_size = con_win_size
        self.halfwin = con_win_size // 2
        
        if self.spec_repr == "c":
            self.X_dim = (self.batch_size, 192, self.con_win_size, 1)
        elif self.spec_repr == "m":
            self.X_dim = (self.batch_size, 128, self.con_win_size, 1)
        elif self.spec_repr == "cm":
            self.X_dim = (self.batch_size, 320, self.con_win_size, 1)
        elif self.spec_repr == "s":
            self.X_dim = (self.batch_size, 1025, self.con_win_size, 1)
            
        self.y_dim = (self.batch_size, self.label_dim[0], self.label_dim[1])
        
        self.on_epoch_end()
        
    def __len__(self):
        # number of batches per epoch
        return int(np.floor(float(len(self.data["repr"])) / self.batch_size))
    
    def __getitem__(self, index):
        # generate indices of the batch(バッチごとのインデクス)
        indexes = self.indexes[index*self.batch_size : (index+1)*self.batch_size]
        
        # find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        
        # generate data
        X, y = self.__data_generation(list_IDs_temp)
        
        return X, y
    
    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.data["repr"]))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, list_IDs_temp):
        #Generates data containing batch_size samples
        # X : (n_samples, *dim, n_channels)
        
        # Initialization
        X = np.empty(self.X_dim)
        y = np.empty(self.y_dim)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            
            # determine filename
            frame_idx = ID
            #print(i, ID)
            
            # load a context window centered around the frame index
            loaded = self.data
            full_x = np.pad(loaded["repr"], [(self.halfwin,self.halfwin), (0,0)], mode='constant')
            sample_x = full_x[frame_idx : frame_idx + self.con_win_size]
            X[i,] = np.expand_dims(np.swapaxes(sample_x, 0, 1), -1)

            # Store label
            y[i,] = loaded["labels"][0]

        return X, y

In [None]:
audio_path = "/content/drive/MyDrive/lab/2021/MyTabCNN/data/my_test_audio/WIN_20220331_03_10_44_Pro.wav"
gen = AudioDataGenerator(path = audio_path, mode="c")
validation = gen.load_audiodata()
IPython.display.Audio(audio_path)

ParameterError: ignored

In [None]:
fig = plt.figure()
my_validation = DataGenerator2(validation)
ims3 = []
for h in range(9):
  input_data = my_validation[h]
  images = torch.Tensor(np.transpose(input_data[0], (0,3,1,2)))
  outputs = net(images)
  _, predicted = torch.max(outputs.data, 2)
  data = np.zeros(((128, 6, 21)))
  for i, strings in enumerate(predicted):
    for j, fret in enumerate(strings):
      data[i][j][fret] = 1
  #print(data[0])

  for i in data:
    im = Image.new('RGB', (1000, 350), (255, 255, 255))
    draw = ImageDraw.Draw(im)

    for j in range(6):
      draw.line(((92, (j+1) * 50), (1800, (j+1) * 50)), fill=(0, 0, 0), width=2)
    for j in range(10):
      draw.line((((j+1) * 92, 50), ((j+1) * 92, 300)), fill=(0, 0, 0), width=2)

    for string, j in enumerate(i):
      for fret, k in enumerate(j):
        if fret <= 9:
          if not fret == 0:
            if k == 1:
              draw.pieslice(((-68 + fret*92, 30 + (5-string)*50), (-28 + fret*92, 70 + (5-string)*50)), start=0, end=360, fill=(255, 0, 0))
    im = plt.imshow(np.array(im))
    
    ims3.append([im])

ani = animation.ArtistAnimation(fig, ims3, interval=20)
rc('animation', html='jshtml')
plt.close()
ani

Output hidden; open in https://colab.research.google.com to view.

In [None]:
ani.save("/content/drive/MyDrive/lab/2021/MyTabCNN/data/my_test_audio/" + os.path.splitext(os.path.basename(audio_path))[0]+".mp4", writer="ffmpeg")