In [86]:
import numpy as np
import random
import sklearn
from sklearn import metrics, ensemble
from matplotlib import pyplot as plt

np.set_printoptions(suppress=True)
np.random.seed(100)
random.seed(100)

In [28]:
def prepare(dset):
    res = np.delete(dset, (1, 2), axis=2)
    
    tmp = res[..., 1].copy()
    res[..., 1] = res[..., 2]
    res[..., 2] = tmp
    
    return res

In [96]:
def messup(lines, n):
    for line in lines:
        idxs = random.sample(range(len(line)), n)

        for idx in idxs:
            line[idx][2] = random.randint(10, 120)

In [72]:
dset = np.load(r'C:\Users\mrshu\reps\music-style-performer\data\dset1.npy')

In [73]:
processed = prepare(dset)
np.random.shuffle(processed)

In [74]:
print(processed.shape)

(6548538, 10, 3)


In [75]:
train = processed[:-100000] 
test = processed[-100000:]

print(f'train: {train.shape}, test: {test.shape}')

train: (6448538, 10, 3), test: (100000, 10, 3)


In [97]:
test_messed = [test[i * 10000: (i + 1) * 10000].copy() for i in range(10)]
for i in range(10):
    messup(test_messed[i], i + 1)

In [115]:
train_flatten = np.reshape(train.copy()[..., 2], (len(train), 10))
test_flatten = np.reshape(test.copy()[..., 2], (len(test), 10))

test_messed_flatten = []
for i in range(10):
    test_messed_flatten.append(np.reshape(test_messed[i].copy()[..., 2], (len(test_messed[i]), 10)))

# Model

In [116]:
model = sklearn.ensemble.IsolationForest(n_estimators=100, max_samples='auto', contamination='auto', max_features=1.0)

In [117]:
model.fit(train_flatten)

IsolationForest()

In [118]:
truth_predict = model.predict(test_flatten)
messed_predict = [model.predict(test_messed_flatten[i]) for i in range(10)]

In [119]:
unique, counts = np.unique(truth_predict, return_counts=True)
print(dict(zip(unique, counts)))

{-1: 13521, 1: 86479}


In [120]:
for i in range(10):
    unique, counts = np.unique(messed_predict[i], return_counts=True)
    print(dict(zip(unique, counts)))

{-1: 2613, 1: 7387}
{-1: 4560, 1: 5440}
{-1: 6563, 1: 3437}
{-1: 8131, 1: 1869}
{-1: 9017, 1: 983}
{-1: 9477, 1: 523}
{-1: 9780, 1: 220}
{-1: 9891, 1: 109}
{-1: 9952, 1: 48}
{-1: 9964, 1: 36}


In [122]:
model.decision_function(test_flatten)

array([-0.03337875,  0.08272684,  0.0656414 , ...,  0.11258603,
        0.08341398,  0.07636844])