In [1]:
from sklearn.datasets import load_svmlight_file
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from skmultiflow.trees import HoeffdingTreeClassifier

from pathlib import Path
import numpy as np
import random
from sklearn.model_selection import train_test_split

In [2]:
DATA_PATH = Path('D://dataset')

def read_libsvm(dname):
    if (DATA_PATH / f'{dname}.txt').exists():
        xs, ys = load_svmlight_file(str((DATA_PATH / f'{dname}.txt')))
        xs, xs_t, ys, ys_t = train_test_split(xs.toarray(), ys, train_size=0.2)
        return xs, ys, xs_t, ys_t
    else:
        dir = DATA_PATH / dname
        xs, ys = load_svmlight_file(str(dir / f'{dname}.txt'))
        xs_t, ys_t = load_svmlight_file(str(dir / f'{dname}.t'))
        return xs.toarray(), ys, xs_t.toarray(), ys_t

In [3]:
class Phaser:
    def __init__(self, arr, p):
        m = p * 0.5
        # m = 0.1
        self.mi = min(arr) - m
        self.ma = max(arr) + m
        self.p = p

    def __call__(self, x):
        x = x + self.p
        if x > self.ma:
            return x + self.mi - self.ma
        elif x < self.mi:
            return x + self.ma - self.mi
        else:
            return x

def trans(ds, i):
    A = [x[i] for x in ds[0]]
    r = random.random() * 0.6 + 0.2
    # r = 0.8
    pa = Phaser(A, (max(A)-min(A)) * r)
    for x in ds[0]:
        x[i] = pa(x[i])
    for x in ds[2]:
        x[i] = pa(x[i])

def test_md_ds(md, ds):
    clf = md()
    clf.fit(ds[0], ds[1])
    if len(ds) == 2:
        print(clf.score(ds[0], ds[1]))
    else:
        print(clf.score(ds[2], ds[3]))

In [4]:
TargetModel = DecisionTreeClassifier
TargetModel = RandomForestClassifier
# TargetModel = GradientBoostingClassifier
# TargetModel = HoeffdingTreeClassifier

In [8]:
ds = read_libsvm('cod-rna')
test_md_ds(TargetModel, ds)

nf = len(ds[0][0])
for i in range(nf):
    trans(ds, i)

test_md_ds(TargetModel, ds)

0.9434156378600823
0.938376585201982
