In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!nvidia-smi

Fri Jun 24 17:11:36 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    39W / 300W |   1441MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!cp -r /content/drive/MyDrive/gozu/2021/MyTabCNN/data/spec_repr /content

In [None]:
# coding:utf-8

import pandas as pd
import sys
import numpy as np
import os
os.chdir("/content/drive/MyDrive/gozu/2021/MyTabCNN/model")
from MyDataGenerator import MyDataGenerator
import torch
import torch.nn as nn
import torch.nn.functional as f
import datetime

batch_size=128
epoch=1#8
con_win_size = 9
spec_repr="c"
data_path="/content/spec_repr/"
id_file="id.csv"
save_path="saved/"
    
save_folder = save_path + spec_repr + "_" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "/"
if not os.path.exists(save_folder):
    os.makedirs(save_folder)
log_file = save_folder + "log.txt"

metrics = {}
for i in range(epoch):
    metrics[i] = {}
    metrics[i]["pp"] = []
    metrics[i]["pr"] = []
    metrics[i]["pf"] = []
    metrics[i]["tp"] = []
    metrics[i]["tr"] = []
    metrics[i]["tf"] = []
    metrics[i]["tdr"] = []
print(metrics)

if spec_repr == "c":
    input_shape = (192, con_win_size, 1)
elif spec_repr == "m":
    input_shape = (128, con_win_size, 1)
elif spec_repr == "cm":
    input_shape = (320, con_win_size, 1)
elif spec_repr == "s":
    input_shape = (1025, con_win_size, 1)
    
# these probably won't ever change
num_classes = 21
num_strings = 6

csv_file = data_path + id_file
list_IDs = list(pd.read_csv(csv_file, header=None)[0])

def partition_data(data_split):
    data_split = data_split
    partition = {}
    partition["training"] = []
    partition["validation"] = []
    for ID in list_IDs:
        guitarist = int(ID.split("_")[0])
        if guitarist == data_split:
            partition["validation"].append(ID)
        else:
            partition["training"].append(ID)

    training_generator = MyDataGenerator(partition['training'], 
                                            data_path=data_path, 
                                            batch_size=batch_size, 
                                            shuffle=True,
                                            spec_repr=spec_repr, 
                                            con_win_size=con_win_size)
    
    validation_generator = MyDataGenerator(partition['validation'], 
                                            data_path=data_path, 
                                            batch_size=batch_size, 
                                            shuffle=False,
                                            spec_repr=spec_repr, 
                                            con_win_size=con_win_size)
    split_folder = save_folder + str(data_split) + "/"
    if not os.path.exists(split_folder):
            os.makedirs(split_folder)
    return training_generator, validation_generator

def tab2pitch(tab):
    pitch_vector = np.zeros(44)
    string_pitches = [40, 45, 50, 55, 59, 64]
    for string_num in range(len(tab)):
        fret_class = tab[string_num]
        # 0 means that the string is closed 
        if fret_class > 0:
            pitch_num = fret_class + string_pitches[string_num] - 41
            pitch_vector[pitch_num] = 1
    return pitch_vector

def pitch_precision(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_pred.flatten())
    return (1.0 * numerator) / denominator

def pitch_recall(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_gt.flatten())
    return (1.0 * numerator) / denominator

def pitch_f_measure(pred, gt):
    p = pitch_precision(pred, gt)
    r = pitch_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

def tab2bin(tab):
    tab_arr = np.zeros((6,20))
    for string_num in range(len(tab)):
        fret_class = tab[string_num]
        # 0 means that the string is closed 
        if fret_class > 0:
            fret_num = fret_class - 1
            tab_arr[string_num][fret_num] = 1
    return tab_arr

def tab_precision(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_pred.flatten())
    return (1.0 * numerator) / denominator

def tab_recall(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_gt.flatten())
    return (1.0 * numerator) / denominator

def tab_f_measure(pred, gt):
    p = tab_precision(pred, gt)
    r = tab_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

def tab_disamb(pred, gt):
    tp = tab_precision(pred, gt)
    pp = pitch_precision(pred, gt)
    return tp / pp

def evaluate(y_pred, y_gt, e):
    metrics[e]["pp"].append(pitch_precision(y_pred, y_gt))
    metrics[e]["pr"].append(pitch_recall(y_pred, y_gt))
    metrics[e]["pf"].append(pitch_f_measure(y_pred, y_gt))
    metrics[e]["tp"].append(tab_precision(y_pred, y_gt))
    metrics[e]["tr"].append(tab_recall(y_pred, y_gt))
    metrics[e]["tf"].append(tab_f_measure(y_pred, y_gt))
    metrics[e]["tdr"].append(tab_disamb(y_pred, y_gt))

def save_results_csv():
    np.save(save_folder + "tabcnn-out.npy", metrics)

class MyCNN(torch.nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, 3)
        self.conv2 = torch.nn.Conv2d(32, 64, 3)
        self.conv3 = torch.nn.Conv2d(64, 64, 3)
 
        self.pool = torch.nn.MaxPool2d(2, 2)  # カーネルサイズ, ストライド

        self.drop1 = torch.nn.Dropout2d(0.25)
        self.drop2 = torch.nn.Dropout(0.5)
        self.flatten = torch.nn.Flatten()

        self.fc1 = torch.nn.Linear(5952,128)
        self.fc2 = torch.nn.Linear(128,126)
        
 
    def forward(self, x):
        # print(x.size())
        x = f.relu(self.conv1(x))
        # print(x.size())
        x = f.relu(self.conv2(x))
        #print(x.size())
        x = f.relu(self.conv3(x))
        #print(x.size())
        x = self.pool(x)
        #print(x.size())
        x = self.drop1(x)
        #print(x.size())
        x = self.flatten(x)
        #print(x.size())
        x = f.relu(self.fc1(x))
        x = self.drop2(x)
        #print(x.size())
        x = self.fc2(x)
        #print(x.size())
        x = torch.reshape(x, (128, 6, 21)) 
        #m = nn.Softmax(dim=2)
        x_out = x
        #print(x_out.shape)
        return x_out

if __name__ == '__main__':

    print("beginning")

    net = MyCNN()
    device = torch.device('cuda:0')
    print('using device:', device)
    net.to(device)
    print(net)

    criterion = torch.nn.CrossEntropyLoss()  # ロスの計算
    optimizer = torch.optim.SGD(params=net.parameters(), lr=0.01, momentum=0.9)

    for e in range(epoch):
        net.train()
        # loss = None
        training_generator, validation_generator = partition_data(0)
        for i in range(3076):
            input_data = training_generator[i]
            images = input_data[0].astype(np.float32)
            labels = input_data[1].astype(np.float32)
            images = torch.tensor(np.transpose(images, (0,3,1,2)), device=device)
            labels = torch.tensor(labels, device=device)

            optimizer.zero_grad()
            output = net(images)
            loss = 0.0
            for j in range(6):
                loss += criterion(output[:,j,:], labels[:,j,:])
            loss.backward()
            optimizer.step()

            if np.isnan(loss.item()):
                print(output,images,labels,loss)
                break

            if i % 100 == 0:
                print(f"training log: {e} epoch, {i}/3076 loss={loss.item()}")
                print(datetime.datetime.now(), flush=True)
        
        net.eval()#dropout, bachnormのon/off
        training_generator, validation_generator = partition_data(0)
        for i in range(3076):
            input_data = training_generator[i]
            images = input_data[0].astype(np.float32)
            labels = input_data[1].astype(np.float32)
            imagesNew = torch.tensor(np.transpose(images, (0,3,1,2)), device=device)
            labels = torch.tensor(labels, device=device)
            outputs = net(imagesNew)
            _, predicted = torch.max(outputs.data, 2)
            labels = labels.to('cpu').detach().numpy().copy()
            gt = np.argmax(labels, 2)
            evaluate(predicted, gt, e)
            if i % 100 == 0:
                print(f"evaluating log: {e} epoch, {i}/3076 acc_tf={tab_f_measure(predicted, gt)}", flush=True)
                print(datetime.datetime.now(), flush=True)

        net.eval()#dropout, bachnormのon/off
        training_generator, validation_generator = partition_data(0)
        for i in range(615):
            input_data = validation_generator[i]
            images = input_data[0].astype(np.float32)
            labels = input_data[1].astype(np.float32)
            images = torch.tensor(np.transpose(images, (0,3,1,2)), device=device)
            labels = torch.tensor(labels, device=device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 2)
            labels = labels.to('cpu').detach().numpy().copy()
            gt = np.argmax(labels, 2)
            evaluate(predicted, gt, e)

            if i % 100 == 0:
                print(f"evaluating log: {e} epoch, {i}/615 acc_tf={tab_f_measure(predicted, gt)}", flush=True)
                print(datetime.datetime.now(), flush=True)
    torch.save(net, save_folder + 'mytabcnn-model.path')
    save_results_csv()


{0: {'pp': [], 'pr': [], 'pf': [], 'tp': [], 'tr': [], 'tf': [], 'tdr': []}}
beginning
using device: cuda:0
MyCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout2d(p=0.25, inplace=False)
  (drop2): Dropout(p=0.5, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=5952, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=126, bias=True)
)
training log: 0 epoch, 0/3076 loss=18.21452522277832
2022-06-24 17:17:55.950061
training log: 0 epoch, 100/3076 loss=5.500596523284912
2022-06-24 17:19:30.976624
training log: 0 epoch, 200/3076 loss=3.9397127628326416
2022-06-24 17:21:04.762466
training log: 0 epoch, 300/3076 loss=2.9226622581481934
2022-06-24 17:22:38.599449
training log: 0 epoch, 400/3076 

In [None]:
tabcnn = np.load("/content/drive/MyDrive/gozu/2021/MyTabCNN/model/saved/c_2022-06-24_17-17-52/tabcnn-out.npy", allow_pickle=True)
tabcnn = tabcnn.item()
print(np.average(tabcnn[0]["tr"][3076:3076+615]))

0.7295207825191594


In [None]:
# net = MyCNN()
# net = torch.load("/content/drive/MyDrive/gozu/2021/MyTabCNN/model/saved/c_2022-06-24_17-17-52/mytabcnn-model.path", map_location="cuda:0")

In [33]:
net.eval()
acc = {}
acc["pp"] = []
acc["pr"] = []
acc["pf"] = []
acc["tp"] = []
acc["tr"] = []
acc["tf"] = []
acc["tdr"] = []
training_generator, validation_generator = partition_data(0)
for i in range(615):
  if i%100 == 0:
    print(i)
  input_data1, input_data2 = validation_generator[i]
  images = input_data1.astype(np.float32)
  labels = input_data2.astype(np.float32)
  images = torch.tensor(np.transpose(images, (0,3,1,2)), device = "cuda:0")
  labels = torch.tensor(labels)
  output = net(images)

  _, pre = torch.max(output.data, 2)
  label = labels.to('cpu').detach().numpy().copy()
  gt = np.argmax(label, 2)
  acc["pp"].append(pitch_precision(pre, gt))
  acc["pr"].append(pitch_recall(pre, gt))
  acc["pf"].append(pitch_f_measure(pre, gt))
  acc["tp"].append(tab_precision(pre, gt))
  acc["tr"].append(tab_recall(pre, gt))
  acc["tf"].append(tab_f_measure(pre, gt))
#np.savez("/content/drive/MyDrive/gozu/2021/MyTabCNN/model/saved/acc2.npz", acc)

0




In [34]:
np.average(acc['tr'])

0.48553612367256327

In [27]:
# coding:utf-8

import pandas as pd
import sys
import numpy as np
import os
os.chdir("/content/drive/MyDrive/gozu/2021/MyTabCNN/model")
from MyDataGenerator import MyDataGenerator
import torch
import torch.nn as nn
import torch.nn.functional as f
import datetime

batch_size=128
epoch=1#8
con_win_size = 9
spec_repr="c"
data_path="/content/spec_repr/"
id_file="id.csv"
save_path="saved/"
    
save_folder = save_path + spec_repr + "_" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "/"
if not os.path.exists(save_folder):
    os.makedirs(save_folder)
log_file = save_folder + "log.txt"

metrics2 = {}
for i in range(epoch):
    metrics2[i] = {}
    metrics2[i]["pp"] = []
    metrics2[i]["pr"] = []
    metrics2[i]["pf"] = []
    metrics2[i]["tp"] = []
    metrics2[i]["tr"] = []
    metrics2[i]["tf"] = []
    metrics2[i]["tdr"] = []
print(metrics2)

if spec_repr == "c":
    input_shape = (192, con_win_size, 1)
elif spec_repr == "m":
    input_shape = (128, con_win_size, 1)
elif spec_repr == "cm":
    input_shape = (320, con_win_size, 1)
elif spec_repr == "s":
    input_shape = (1025, con_win_size, 1)
    
# these probably won't ever change
num_classes = 21
num_strings = 6

csv_file = data_path + id_file
list_IDs = list(pd.read_csv(csv_file, header=None)[0])

def partition_data(data_split):
    data_split = data_split
    partition = {}
    partition["training"] = []
    partition["validation"] = []
    for ID in list_IDs:
        guitarist = int(ID.split("_")[0])
        if guitarist == data_split:
            partition["validation"].append(ID)
        else:
            partition["training"].append(ID)

    training_generator = MyDataGenerator(partition['training'], 
                                            data_path=data_path, 
                                            batch_size=batch_size, 
                                            shuffle=True,
                                            spec_repr=spec_repr, 
                                            con_win_size=con_win_size)
    
    validation_generator = MyDataGenerator(partition['validation'], 
                                            data_path=data_path, 
                                            batch_size=batch_size, 
                                            shuffle=False,
                                            spec_repr=spec_repr, 
                                            con_win_size=con_win_size)
    split_folder = save_folder + str(data_split) + "/"
    if not os.path.exists(split_folder):
            os.makedirs(split_folder)
    return training_generator, validation_generator

def tab2pitch(tab):
    pitch_vector = np.zeros(44)
    string_pitches = [40, 45, 50, 55, 59, 64]
    for string_num in range(len(tab)):
        fret_class = tab[string_num]
        # 0 means that the string is closed 
        if fret_class > 0:
            pitch_num = fret_class + string_pitches[string_num] - 41
            pitch_vector[pitch_num] = 1
    return pitch_vector

def pitch_precision(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_pred.flatten())
    return (1.0 * numerator) / denominator

def pitch_recall(pred, gt):
    pitch_pred = np.array(list(map(tab2pitch,pred)))
    pitch_gt = np.array(list(map(tab2pitch,gt)))
    numerator = np.sum(np.multiply(pitch_pred, pitch_gt).flatten())
    denominator = np.sum(pitch_gt.flatten())
    return (1.0 * numerator) / denominator

def pitch_f_measure(pred, gt):
    p = pitch_precision(pred, gt)
    r = pitch_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

def tab2bin(tab):
    tab_arr = np.zeros((6,20))
    for string_num in range(len(tab)):
        fret_class = tab[string_num]
        # 0 means that the string is closed 
        if fret_class > 0:
            fret_num = fret_class - 1
            tab_arr[string_num][fret_num] = 1
    return tab_arr

def tab_precision(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_pred.flatten())
    return (1.0 * numerator) / denominator

def tab_recall(pred, gt):
    # get rid of "closed" class, as we only want to count positives
    tab_pred = np.array(list(map(tab2bin,pred)))
    tab_gt = np.array(list(map(tab2bin,gt)))
    numerator = np.sum(np.multiply(tab_pred, tab_gt).flatten())
    denominator = np.sum(tab_gt.flatten())
    return (1.0 * numerator) / denominator

def tab_f_measure(pred, gt):
    p = tab_precision(pred, gt)
    r = tab_recall(pred, gt)
    f = (2 * p * r) / (p + r)
    return f

def tab_disamb(pred, gt):
    tp = tab_precision(pred, gt)
    pp = pitch_precision(pred, gt)
    return tp / pp

def evaluate(y_pred, y_gt, e):
    metrics2[e]["pp"].append(pitch_precision(y_pred, y_gt))
    metrics2[e]["pr"].append(pitch_recall(y_pred, y_gt))
    metrics2[e]["pf"].append(pitch_f_measure(y_pred, y_gt))
    metrics2[e]["tp"].append(tab_precision(y_pred, y_gt))
    metrics2[e]["tr"].append(tab_recall(y_pred, y_gt))
    metrics2[e]["tf"].append(tab_f_measure(y_pred, y_gt))
    metrics2[e]["tdr"].append(tab_disamb(y_pred, y_gt))

def save_results_csv():
    np.save(save_folder + "tabcnn-out.npy", metrics)

class MyCNN(torch.nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, 3)
        self.conv2 = torch.nn.Conv2d(32, 64, 3)
        self.conv3 = torch.nn.Conv2d(64, 64, 3)
 
        self.pool = torch.nn.MaxPool2d(2, 2)  # カーネルサイズ, ストライド

        self.drop1 = torch.nn.Dropout2d(0.25)
        self.drop2 = torch.nn.Dropout(0.5)
        self.flatten = torch.nn.Flatten()

        self.fc1 = torch.nn.Linear(5952,128)
        self.fc2 = torch.nn.Linear(128,126)
        
 
    def forward(self, x):
        # print(x.size())
        x = f.relu(self.conv1(x))
        # print(x.size())
        x = f.relu(self.conv2(x))
        #print(x.size())
        x = f.relu(self.conv3(x))
        #print(x.size())
        x = self.pool(x)
        #print(x.size())
        x = self.drop1(x)
        #print(x.size())
        x = self.flatten(x)
        #print(x.size())
        x = f.relu(self.fc1(x))
        x = self.drop2(x)
        #print(x.size())
        x = self.fc2(x)
        #print(x.size())
        x = torch.reshape(x, (128, 6, 21)) 
        #m = nn.Softmax(dim=2)
        x_out = x
        #print(x_out.shape)
        return x_out

if __name__ == '__main__':

    print("beginning")

    net2 = net
    device = torch.device('cuda:0')
    print('using device:', device)
    net.to(device)
    print(net2)

    for e in range(epoch):
        net2.eval()#dropout, bachnormのon/off
        training_generator, validation_generator = partition_data(0)
        for i in range(615):
            input_data = training_generator[i]
            images = input_data[0].astype(np.float32)
            labels = input_data[1].astype(np.float32)
            images = torch.tensor(np.transpose(images, (0,3,1,2)), device=device)
            labels = torch.tensor(labels, device=device)
            outputs = net2(images)
            _, predicted = torch.max(outputs.data, 2)
            labels = labels.to('cpu').detach().numpy().copy()
            gt = np.argmax(labels, 2)
            evaluate(predicted, gt, e)

            if i % 100 == 0:
                print(f"evaluating log: {e} epoch, {i}/615 acc_tf={tab_f_measure(predicted, gt)}", flush=True)
                print(datetime.datetime.now(), flush=True)

{0: {'pp': [], 'pr': [], 'pf': [], 'tp': [], 'tr': [], 'tf': [], 'tdr': []}}
beginning
using device: cuda:0
MyCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout2d(p=0.25, inplace=False)
  (drop2): Dropout(p=0.5, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=5952, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=126, bias=True)
)
evaluating log: 0 epoch, 0/615 acc_tf=0.7578347578347578
2022-06-24 20:29:35.725117
evaluating log: 0 epoch, 100/615 acc_tf=0.8052631578947369
2022-06-24 20:31:42.457357
evaluating log: 0 epoch, 200/615 acc_tf=0.7466666666666667
2022-06-24 20:33:50.538109


KeyboardInterrupt: ignored