In [None]:
import os
import sys

import logging
from logging import handlers

os.environ['CUDA_VISIBLE_DEVICES'] = "0"
# PATH TO PROJECT ROOT
sys.path.append("/path/to/VocalDetection")

from definitions import LOG_DIR, WEIGHT_DIR, DATA_DIR
from utils import dataset
from models.SCNN18 import SCNN18

import numpy as np
import tensorflow as tf


In [None]:
LOG = logging.getLogger('root')
def initLog(debug=False):
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d %H:%M',
        handlers=[logging.StreamHandler(), handlers.RotatingFileHandler('output.log', "w", 1024 * 1024 * 100, 3, "utf-8")]
    )
    LOG.setLevel(logging.DEBUG if debug else logging.INFO)
initLog()

In [None]:
imput_shape = (32000, 1)
batch_size = 150
classes = 2
lr = 1.0
LOSS = 'categorical_crossentropy'
METRICS = ['accuracy']

optimizer = tf.optimizers.Adadelta(lr)

In [None]:
test_dss = {}
test_ds_paths = [
    'SCNN-Jamendo-test.h5',
    'SCNN-Jamendo-train.h5',
    'SCNN-FMA-C-1-fixed-test.h5',
    'SCNN-FMA-C-2-fixed-test.h5',
    'SCNN-KTV-test.h5',
    'SCNN-MIR-1k-train.h5',
    'SCNN-Instrumental-non-vocal.h5',
    'SCNN-A-Cappella-vocal.h5',
    'SCNN-Taiwanese-stream-test.h5',
    'SCNN-Taiwanese-CD-test.h5',
    'SCNN-Chinese-CD-test.h5',
    'SCNN-Classical-test.h5',
    'SCNN-test-hard.h5',
    'SCNN-RWC.h5'
]
for test_ds_path in test_ds_paths:
    test_dss[test_ds_path] = dataset.load(test_ds_path).batch(batch_size)

In [None]:
results = {}
accuracies = {}

jamendo_weight_path = os.path.join(WEIGHT_DIR, f"Jamendo/20210123-12_SCNN18_SCNN-Jamendo-train_h5-0.h5")
FMA_C_1_weight_path = os.path.join(WEIGHT_DIR, f"FMA-C-1/2021-02-13_11_SCNN18_SCNN-FMA-C-1-fixed-train_h5_2GPU-0.h5")
FMA_C_2_weight_path = os.path.join(WEIGHT_DIR, f"FMA-C-2/2021-02-14_09_SCNN18_SCNN-FMA-C-2-fixed-train_h5_2GPU-0.h5")
for weight_path in [jamendo_weight_path, FMA_C_1_weight_path, FMA_C_2_weight_path]:
    model = SCNN18(imput_shape, classes).model()
    model.compile(loss=LOSS, optimizer=optimizer, metrics=METRICS)
    model.load_weights(weight_path)
    LOG.info(weight_path)
    # print(model.summary())

    for test_ds_path in test_ds_paths:
        if not results.get(test_ds_path):
            results[test_ds_path] = []
            accuracies[test_ds_path] = []
        loss, acc = model.evaluate(test_dss[test_ds_path], verbose=0)
        result = model.predict(test_dss[test_ds_path], verbose=0)

        accuracies[test_ds_path].append(acc)
        results[test_ds_path].append(np.mean([max(i) for i in result]))

        LOG.info(f'{loss} {acc}')
        LOG.info(f'{test_ds_path}: {np.mean([max(i) for i in result])}')
    del model
    for test_ds_path in test_ds_paths:
        LOG.info(f'{test_ds_path:30} {np.mean(results[test_ds_path]):.4f} {np.mean(accuracies[test_ds_path]):.4f}')

In [None]:
for test_ds_path in test_ds_paths:
    print(f'{test_ds_path:30} {np.mean(results[test_ds_path]):.4f} {np.mean(accuracies[test_ds_path]):.4f}')
    # print(f'{test_ds_path} {np.mean(accuracies[test_ds_path])}')

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import LeaveOneOut
import numpy as np

In [None]:
labels = [
    "Jamendo-test",
    "FMA-C-1-fixed-test",
    "FMA-C-2-fixed-test",
    "KTV-test",
    "MIR-1k-train",
    "Instrumental-non-vocal",
    "A-Cappella-vocal",
    "Taiwanese-stream-test",
    "Taiwanese-CD-test",
    "Chinese-CD-test",
    "Classical-test",
    "test-hard",
    "RWC"
]

jamando_acc = np.array([
    0.9375, # Jamendo-test
    0.8239, # FMA-C-1-fixed-test
    0.8573, # FMA-C-2-fixed-test
    0.9402, # KTV-test
    0.8702, # MIR-1k-train
    0.7972, # Instrumental-non-vocal
    0.9502, # A-Cappella-vocal
    0.8262, # Taiwanese-stream-test
    0.8879, # Taiwanese-CD-test
    0.9100, # Chinese-CD-test
    0.8147, # Classical-test
    0.6614, # test-hard
    0.9222, # RWC
])
jamendo_reversed_acc = np.array([
    0.9905697703,
    0.9816458821,
    0.9842948914,
    0.9915107489,
    0.9807985425,
    0.9855387807,
    0.9930589795,
    0.9824882746,
    0.9871518016,
    0.9903222919,
    0.9832262993,
    0.9726365209,
    0.9894, # RWC
])

FMA_C_1_acc = np.array([
    0.9273434759,
    0.8962872297,
    0.906954272,
    0.956143101,
    0.9345802837,
    0.8868577871,
    0.9284110591,
    0.8934416647,
    0.8916611728,
    0.8719023705,
    0.9433103714,
    0.7243884648,
    0.9222, # RWC
])

FMA_C_1_reversed_acc = np.array([
    0.993,
    0.9906,
    0.9907,
    0.9955,
    0.9884,
    0.9915,
    0.9943,
    0.9902,
    0.9931,
    0.994,
    0.9908,
    0.9809,
    0.9913, # RWC
])

FMA_C_2_acc = np.array([
    0.9327,
    0.8908,
    0.9090,
    0.9349,
    0.8685,
    0.8897,
    0.9572,
    0.8534,
    0.9203,
    0.9133,
    0.9108,
    0.7196,
    0.9220, # RWC
])

FMA_C_2_reversed_acc = np.array([
    0.9913,
    0.9890,
    0.9913,
    0.9952,
    0.9865,
    0.9916,
    0.9956,
    0.9915,
    0.9937,
    0.9927,
    0.9934,
    0.9806,
    0.9917, # RWC
])

loo = LeaveOneOut()

In [None]:
np.corrcoef(FMA_C_2_acc, FMA_C_2_reversed_acc)

In [None]:
diff = []
for train_index, test_index in loo.split(range(len(FMA_C_2_acc))):
    lr = LinearRegression()
    lr.fit(FMA_C_2_reversed_acc[train_index].reshape(-1, 1), FMA_C_2_acc[train_index].reshape(-1, 1))
    diff.append(abs(FMA_C_2_acc[test_index][0] - lr.predict(FMA_C_2_reversed_acc[test_index].reshape(-1, 1))[0][0]))
    print(lr.predict(FMA_C_2_reversed_acc[test_index].reshape(-1, 1))[0][0])
    # plt.scatter(fmac1_acc[train_index].reshape(-1, 1), lr.predict(fmac1_reversed_acc[train_index].reshape(-1, 1)))
    # plt.show()
print("diff:\n", "\n".join([f'{labels[i]}: {str(x)}' for i, x in enumerate(diff)]))
print("avg: ", np.sum(diff)/len(diff))