In [1]:
import _config
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from utils.data_utils import get_libsvm_data, get_fit_dataloaders
from torch.autograd import Variable
from collections import Counter
from modules.np_modules import construct_np_model, train_np_model, eval_np_model
from utils.modules_utils import cal_fx, cal_km
import numpy as np
import torch
import time
import pytorch_lightning as pl
from modules.conv_modules import Basic_Block, CNN_Res18
from modules.data_modules import GeneralDataModule
from modules.lightning_modules import CompressionNet

## v1

In [2]:
datasets = _config.datasets
n_classes = _config.n_classes
labels = _config.labels

n_epoch = 1
model_type = 'svc'
kernel = 'rbf'
traverse_list = ['a9a']

map_size = 128
blocks = [2,2,2,2]
rate_list = [0.5]

lr = 0.1

early_stop_callback = EarlyStopping(
    monitor='val_mae',
    min_delta=0.00,
    patience=3,
    verbose=False,
    mode='max'
)


### multiple dataset and compressed_rate run

In [3]:
for name in traverse_list:
    n_features = datasets[name]['n_features']
    is_multi = datasets[name]['is_multi']
    has_test = datasets[name]['has_test']

    # -------------------
    # Load Data
    # -------------------

    print('...Load data')
    X_train, y_train, X_test, y_test = get_libsvm_data(name, n_features, is_multi, has_test)

    # -------------------
    # Train np model
    # -------------------

    print('\n... Train np model')
    np_model = construct_np_model(model_type=model_type, kernel=kernel)
    np_model = train_np_model(np_model, X_train, y_train)
    eval_np_model(np_model, X_train, y_train, X_test, y_test)

    # 获取参数并计算fx
    X_fit = X_train[np_model.support_]
    y_fit = y_train[np_model.support_]
    params = np_model.get_params()
    if 'gamma' in params:
        params['gamma'] = 1/(n_features * X_train.var()) if params['gamma'] == 'scale' else params['gamma']
    coef = np_model.dual_coef_
    intercept = np_model.intercept_
    
    km = cal_km(params, X_fit, X_test[:10], type='realize')
    fx = cal_fx(km, coef, intercept)
    print('\nfx-realize:')
    print(fx)
    print('\ndecision_function:')
    print(np_model.decision_function(X_test[:10]))
    print('\ny_pred')
    print(np_model.predict(X_test[:10]))



    # -------------------
    # Pre-processing
    # -------------------

    print('\n...Pre-processing')
    # 根据coef对X_fit等升序排序
    sorted_idx = np.argsort(coef[0])
    sorted_coef = coef[0][sorted_idx].reshape(-1, 1)
    sorted_X_fit = X_fit[sorted_idx]
    sorted_y_fit = y_fit[sorted_idx]

    # 舍弃低权重的instance
    fit_dataloaders, n_fit = get_fit_dataloaders(sorted_X_fit, sorted_coef, sorted_y_fit, _config.labels, map_size)

    # 展示原分布
    print('\ndata distribution compare:')
    print('train data - num:{}, distribution:{}'.format(X_train.shape[0], sorted(Counter(y_train).items())))
    print('fit data - num:{}, distribution:{}'.format(X_fit.shape[0], sorted(Counter(y_fit).items())))


    # -------------------
    # Compressing && Inference
    # -------------------

    for rate in rate_list:
        start = time.time()

        # 计算每个map压缩的数量
        n_compressed = X_fit.shape[0] * (1-rate)
        rate = n_compressed / n_fit
        n_generate = int(rate*map_size)

        # 初始化压缩表示
        X_compressed = None
        y_compressed = None
        coef_compressed = None
        is_none = True

        # 对于所有label
        for label in labels:
            fit_dataloader = fit_dataloaders[label]
            # 对于每个batch
            for data in fit_dataloader:
                X_map, coef_map = data
                coef_map = coef_map.view(1,-1)

                #初始化网络
                conv_module = CNN_Res18(Basic_Block, blocks, n_generate)
                compression_net = CompressionNet(conv_module, X_map, label, lr, n_generate, params)

            #     for name, param in compression_net.named_parameters():
            #         print(name)
            #     break
            # break

                # 初始化数据
                gdm = GeneralDataModule(n_features,n_classes,labels,batch_size=64)
                gdm.prepare_data(X_train, X_test, params, X_map[0][0].numpy(), coef_map.numpy(), label)

                #训练
                trainer = pl.Trainer(callbacks=[early_stop_callback], max_epochs=1)
                trainer.fit(compression_net, gdm) #单轮测试

                #测试
                trainer.test(datamodule=gdm)

                # 保存压缩向量
                X_compressed_partial =compression_net.forward().cpu().detach().numpy()
                y_compressed_partial = np.full((X_compressed_partial.shape[0],),label)
                alpha_i_compressed_partial = compression_net.get_alpha_i().cpu().detach().numpy()
                label_i = 1 if label > 0 else -1
                coef_compressed_partial = alpha_i_compressed_partial * label_i


                if is_none:
                    X_compressed = X_compressed_partial
                    coef_compressed = coef_compressed_partial
                    y_compressed = y_compressed_partial
                    is_none = False
                else:
                    X_compressed = np.concatenate((X_compressed,X_compressed_partial),axis=0)
                    coef_compressed = np.concatenate((coef_compressed,coef_compressed_partial),axis=0)
                    y_compressed = np.concatenate((y_compressed,y_compressed_partial),axis=0)
                
                break
            break

        end = time.time()
        print('\nTraining time used:{:.2f}'.format(end-start))
        print('-------------------')

        # 使用coef_compressed 和 X_compressed 来计算fx
        start = time.time()
        print('\n eval compressed ins')
        n_true = 0
        for i in range(X_test.shape[0]):
            km_compressed = cal_km(params, X_compressed, X_test[i].reshape(1,-1), type='interface')
            fx_compressed = cal_fx(km_compressed, coef_compressed, intercept=0)
            pred_compressed = 1 if fx_compressed > 0 else 0
            y_pred = np_model.predict(X_test[i].reshape(1,-1))
            if pred_compressed == y_pred:
                n_true = n_true + 1 

            if i < 10:
                print('\nThe {}-th ins:'.format(i))
                print('fx_compressed:{}'.format(fx_compressed))
                print('fx_np_model:{}'.format(np_model.decision_function(X_test[i].reshape(1,-1)))) 
                print('pred_comressed:{}'.format(pred_compressed))
                print('pred_np_model:{}'.format(y_pred))
        
        end = time.time()
        print('\nTraining time used:{:.2f}'.format(end-start))
        print('On compression rate {}, Acc:{:.2f}'.format(rate, n_true/X_test.shape[0]))
        print('-------------------------')



        





...Load data
Loading Finish: Time used 0.32
---Dataset Info---
dataset name:a9a
n_features:123
is_multi:False
Trainset:[(0, 24720), (1, 7841)]
Testset:[(0, 12435), (1, 3846)]

... Train np model
Training Finish: Time used 2.29
dataset   	acc       	precision 	recall    	f1-score  
trainset  	0.86      	0.82      	0.78      	0.80      
testset   	0.85      	0.80      	0.76      	0.78      

fx-realize:
[-1.65739295 -1.10803564 -0.27469856  0.64455147 -2.14699336 -1.51682963
 -1.84686507  1.06113959 -1.8236303  -1.76943493]

decision_function:
[-1.65739295 -1.10803564 -0.27469856  0.64455147 -2.14699336 -1.51682963
 -1.84686507  1.06113959 -1.8236303  -1.76943493]

y_pred
[0 0 0 1 0 0 0 1 0 0]

...Pre-processing
----label 0----
before X_temp_shape: (6118, 123)
before coef_temp_shape: (6118, 1)
after X_temp_shape: (6016, 123)
after coef_shape: (11750, 1)
n_parts: 47.0
47 map for each 128 item,total 6016
batch X_temp_shape: (47, 1, 128, 123)
batch coef_temp_shape: (47, 128, 1)
----label 1-

HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…


Initial coef:
Parameter containing:
tensor([[0.8011, 0.1955, 0.4238, 0.4703, 0.8537, 0.1960, 0.1115, 0.6037, 0.6715,
         0.0967, 0.4637, 0.6877, 0.1777, 0.7446, 0.8466, 0.1319, 0.3728, 0.5427,
         0.2927, 0.0385, 0.0735, 0.8824, 0.2576, 0.1506, 0.2248, 0.2610, 0.4486,
         0.1687, 0.9684, 0.3092, 0.2124, 0.7941, 0.1552, 0.7067, 0.8772, 0.7818,
         0.1390, 0.2239, 0.4006, 0.6768, 0.8969, 0.5103, 0.8686, 0.6256, 0.4053,
         0.5924, 0.7168, 0.0559, 0.2073, 0.1755, 0.1079, 0.5327, 0.0603, 0.4983,
         0.7293, 0.6380, 0.3833, 0.1050, 0.8296, 0.5074, 0.4770, 0.5528, 0.8719,
         0.3243]], requires_grad=True)

Initial alpha_i:
tensor([[0.8201, 0.1689, 0.4143, 0.4643, 0.8766, 0.1694, 0.0786, 0.6078, 0.6807,
         0.0626, 0.4573, 0.6981, 0.1497, 0.7594, 0.8690, 0.1004, 0.3595, 0.5422,
         0.2734, 0.0000, 0.0376, 0.9075, 0.2356, 0.1205, 0.2003, 0.2393, 0.4410,
         0.1400, 1.0000, 0.2911, 0.1870, 0.8125, 0.1255, 0.7186, 0.9019, 0.7993,
         0.1081

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…



After train coef:
Parameter containing:
tensor([[3.8007e+00, 3.9415e+00, 3.8692e+00, 3.5774e+00, 4.0650e+00, 3.6349e+00,
         3.8539e+00, 3.5034e+00, 3.8113e+00, 3.9343e+00, 4.0723e+00, 3.9303e+00,
         3.8368e+00, 3.8517e+00, 3.9371e+00, 3.8758e+00, 3.8756e+00, 3.5968e+00,
         3.8454e+00, 1.3174e-03, 3.8310e+00, 3.5387e+00, 3.8535e+00, 3.6668e+00,
         3.6205e+00, 3.7672e+00, 3.5820e+00, 3.5666e+00, 3.6564e+00, 3.8730e+00,
         3.5803e+00, 3.7363e+00, 3.9408e+00, 3.9322e+00, 3.8389e+00, 3.8242e+00,
         3.6309e+00, 3.5702e+00, 3.6029e+00, 4.0757e+00, 3.7160e+00, 4.0748e+00,
         4.0689e+00, 3.6583e+00, 4.0024e+00, 3.6910e+00, 3.8725e+00, 3.7897e+00,
         3.7737e+00, 3.6344e+00, 3.8350e+00, 3.9674e+00, 3.7322e+00, 3.8887e+00,
         3.8193e+00, 3.8533e+00, 4.0740e+00, 4.0537e+00, 4.0655e+00, 3.7842e+00,
         3.8802e+00, 3.4346e+00, 3.9988e+00, 3.6209e+00]], requires_grad=True)

After train alpha_i:
tensor([[0.9325, 0.9671, 0.9493, 0.8777, 0.9974

HBox(children=(HTML(value='Testing'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=…


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': tensor(13.6322),
 'test_mae': tensor(14.1322),
 'test_mse': tensor(244.0771),
 'test_var': tensor(0.2090),
 'val_loss': tensor(12.0541),
 'val_mae': tensor(12.5532),
 'val_mse': tensor(196.8891),
 'val_var': tensor(0.2024)}
--------------------------------------------------------------------------------

 eval compressed ins

The 0-th ins:
fx_compressed:[-25.24531806]
fx_np_model:[-1.65739295]
pred_comressed:0
pred_np_model:[0]

The 1-th ins:
fx_compressed:[-28.89513162]
fx_np_model:[-1.10803564]
pred_comressed:0
pred_np_model:[0]

The 2-th ins:
fx_compressed:[-25.47295713]
fx_np_model:[-0.27469856]
pred_comressed:0
pred_np_model:[0]

The 3-th ins:
fx_compressed:[-24.9814386]
fx_np_model:[0.64455147]
pred_comressed:0
pred_np_model:[1]

The 4-th ins:
fx_compressed:[-30.62108772]
fx_np_model:[-2.14699336]
pred_comressed:0
pred_np_model:[0]

The 5-th ins:
fx_compressed

### splice test

In [3]:
name = 'a9a'

n_features = datasets[name]['n_features']
is_multi = datasets[name]['is_multi']
has_test = datasets[name]['has_test']

# -------------------
# Load Data
# -------------------

print('...Load data')
X_train, y_train, X_test, y_test = get_libsvm_data(name, n_features, is_multi, has_test)

# -------------------
# Train np model
# -------------------

print('\n... Train np model')
np_model = construct_np_model(model_type=model_type, kernel=kernel)
np_model = train_np_model(np_model, X_train, y_train)
eval_np_model(np_model, X_train, y_train, X_test, y_test)

# 获取参数并计算fx
X_fit = X_train[np_model.support_]
y_fit = y_train[np_model.support_]
params = np_model.get_params()
if 'gamma' in params:
    params['gamma'] = 1/(n_features * X_train.var()) if params['gamma'] == 'scale' else params['gamma']
coef = np_model.dual_coef_
intercept = np_model.intercept_

km = cal_km(params, X_fit, X_test[:10], type='realize')
fx = cal_fx(km, coef, intercept)
print('\nfx-realize:')
print(fx)
print('\ndecision_function:')
print(np_model.decision_function(X_test[:10]))
print('\ny_pred')
print(np_model.predict(X_test[:10]))



# -------------------
# Pre-processing
# -------------------

print('\n...Pre-processing')
# 根据coef对X_fit等升序排序
sorted_idx = np.argsort(coef[0])
sorted_coef = coef[0][sorted_idx].reshape(-1, 1)
sorted_X_fit = X_fit[sorted_idx]
sorted_y_fit = y_fit[sorted_idx]

# 舍弃低权重的instance
fit_dataloaders, n_fit = get_fit_dataloaders(sorted_X_fit, sorted_coef, sorted_y_fit, _config.labels, map_size)

# 展示原分布
print('\ndata distribution compare:')
print('train data - num:{}, distribution:{}'.format(X_train.shape[0], sorted(Counter(y_train).items())))
print('fit data - num:{}, distribution:{}'.format(X_fit.shape[0], sorted(Counter(y_fit).items())))


...Load data
Loading Finish: Time used 0.31
---Dataset Info---
dataset name:a9a
n_features:123
is_multi:False
Trainset:[(0, 24720), (1, 7841)]
Testset:[(0, 12435), (1, 3846)]

... Train np model
Training Finish: Time used 2.33
dataset   	acc       	precision 	recall    	f1-score  
trainset  	0.86      	0.82      	0.78      	0.80      
testset   	0.85      	0.80      	0.76      	0.78      

fx-realize:
[-1.65739295 -1.10803564 -0.27469856  0.64455147 -2.14699336 -1.51682963
 -1.84686507  1.06113959 -1.8236303  -1.76943493]

decision_function:
[-1.65739295 -1.10803564 -0.27469856  0.64455147 -2.14699336 -1.51682963
 -1.84686507  1.06113959 -1.8236303  -1.76943493]

y_pred
[0 0 0 1 0 0 0 1 0 0]

...Pre-processing
----label 0----
before X_temp_shape: (6118, 123)
before coef_temp_shape: (6118, 1)
after X_temp_shape: (6016, 123)
after coef_shape: (11750, 1)
n_parts: 47.0
47 map for each 128 item,total 6016
batch X_temp_shape: (47, 1, 128, 123)
batch coef_temp_shape: (47, 128, 1)
----label 1-

In [9]:
# -------------------
# Compressing && Inference
# -------------------
rate = 0.5
# 计算每个map压缩的数量
n_compressed = X_fit.shape[0] * (1-rate)
rate = n_compressed / n_fit
n_generate = int(rate*map_size)

# 初始化压缩表示
X_compressed = None
y_compressed = None
coef_compressed = None
is_none = True

# 对于所有label
for label in labels:
    fit_dataloader = fit_dataloaders[label]
    # 对于每个batch
    for data in fit_dataloader:
        X_map, coef_map = data
        coef_map = coef_map.view(1,-1)

        #初始化网络
        conv_module = CNN_Res18(Basic_Block, blocks, n_generate)
        compression_net = CompressionNet(conv_module, X_map, label, lr, n_generate, params)

    #     for name, param in compression_net.named_parameters():
    #         print(name)
    #     break
    # break

        # 初始化数据
        gdm = GeneralDataModule(n_features,n_classes,labels,batch_size=64)
        gdm.prepare_data(X_train, X_test, params, X_map[0][0].numpy(), coef_map.numpy(), label)

        #训练
        trainer = pl.Trainer(callbacks=[early_stop_callback], max_epochs=1)
        trainer.fit(compression_net, gdm) #单轮测试

        #测试
        trainer.test(datamodule=gdm)

        # 保存压缩向量
        X_compressed_partial =compression_net.forward().cpu().detach().numpy()
        y_compressed_partial = np.full((X_compressed_partial.shape[0],),label)
        alpha_i_compressed_partial = compression_net.get_alpha_i().cpu().detach().numpy()
        label_i = 1 if label > 0 else -1
        coef_compressed_partial = alpha_i_compressed_partial * label_i


        if is_none:
            X_compressed = X_compressed_partial
            coef_compressed = coef_compressed_partial
            y_compressed = y_compressed_partial
            is_none = False
        else:
            X_compressed = np.concatenate((X_compressed,X_compressed_partial),axis=0)
            coef_compressed = np.concatenate((coef_compressed,coef_compressed_partial),axis=0)
            y_compressed = np.concatenate((y_compressed,y_compressed_partial),axis=0)
        




GPU available: False, used: False
TPU available: None, using: 0 TPU cores

  | Name | Type      | Params
-----------------------------------
0 | conv | CNN_Res18 | 175 K 
-----------------------------------
175 K     Trainable params
0         Non-trainable params
175 K     Total params


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…


Initial coef:
Parameter containing:
tensor([[0.0357, 0.3764, 0.1085, 0.3252, 0.6649, 0.4718, 0.6153, 0.8656, 0.0341,
         0.8161, 0.5587, 0.3541, 0.8191, 0.5575, 0.4546, 0.1656, 0.6378, 0.0809,
         0.3949, 0.4231, 0.7205, 0.6089, 0.6168, 0.3927, 0.4087, 0.3637, 0.7704,
         0.8593, 0.9399, 0.7215, 0.9139, 0.1826, 0.7845, 0.9045, 0.9869, 0.9209,
         0.3893, 0.5479, 0.9396, 0.1038, 0.8510, 0.1102, 0.3595, 0.6568, 0.2958,
         0.2678, 0.6338, 0.4295, 0.7308, 0.8843, 0.7305, 0.0700, 0.2721, 0.2150,
         0.5847, 0.3790, 0.7648, 0.3556, 0.0233, 0.0426, 0.4567, 0.2930, 0.1520,
         0.2856]], requires_grad=True)

Initial alpha_i:
tensor([[0.0095, 0.0134, 0.0102, 0.0127, 0.0178, 0.0147, 0.0170, 0.0218, 0.0095,
         0.0207, 0.0160, 0.0131, 0.0208, 0.0160, 0.0144, 0.0108, 0.0173, 0.0099,
         0.0136, 0.0140, 0.0188, 0.0168, 0.0170, 0.0136, 0.0138, 0.0132, 0.0198,
         0.0216, 0.0235, 0.0189, 0.0229, 0.0110, 0.0201, 0.0226, 0.0246, 0.0230,
         0.0135

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…



After train coef:
Parameter containing:
tensor([[ 3.8794,  3.8427,  3.9198,  3.9491,  3.9733,  4.1019,  3.9741,  3.9114,
          3.9653,  3.6270,  3.9468,  3.7841,  4.0184,  3.9608,  3.9734,  3.9040,
          3.9329,  3.8928,  3.8687,  3.9329,  3.8442,  4.0855,  3.6466,  3.9066,
          3.9551,  3.9417,  3.9479,  3.8579,  3.9304,  3.9651,  4.0267,  3.9368,
          3.7445,  3.9787,  3.8788,  3.9598,  3.9317,  3.9348,  3.8551,  3.7338,
          3.9246,  3.7596,  3.9551,  3.9733,  4.0089,  3.9470,  3.9615,  3.9791,
          3.9552,  3.9262,  3.9263,  3.9649,  3.9736,  3.9679,  3.9294,  3.8666,
          3.9472,  4.1321, -0.0052,  3.9798,  3.9717,  3.8591,  3.8593,  3.9570]],
       requires_grad=True)

After train alpha_i:
tensor([[0.0151, 0.0146, 0.0158, 0.0162, 0.0166, 0.0189, 0.0166, 0.0156, 0.0165,
         0.0118, 0.0162, 0.0138, 0.0174, 0.0164, 0.0166, 0.0155, 0.0160, 0.0153,
         0.0150, 0.0160, 0.0146, 0.0186, 0.0120, 0.0156, 0.0163, 0.0161, 0.0162,
         0.0148,

HBox(children=(HTML(value='Testing'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=…


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': tensor(40.4421),
 'test_mae': tensor(40.9421),
 'test_mse': tensor(1732.1364),
 'test_var': tensor(0.0035),
 'val_loss': tensor(38.6593),
 'val_mae': tensor(39.1593),
 'val_mse': tensor(1582.7970),
 'val_var': tensor(0.0037)}
--------------------------------------------------------------------------------


In [11]:
# 使用coef_compressed 和 X_compressed 来计算fx
print('\n eval compressed ins')
n_true = 0
for i in range(X_test.shape[0]):
    km_compressed = cal_km(params, X_compressed, X_test[i].reshape(1,-1), type='interface')
    fx_compressed = cal_fx(km_compressed, coef_compressed, intercept=0)
    pred_compressed = 1 if fx_compressed > 0 else 0
    y_pred = np_model.predict(X_test[i].reshape(1,-1))
    if pred_compressed == y_pred:
        n_true = n_true + 1 

    if i < 10:
        print('\nThe {}-th ins:'.format(i))
        print('fx_compressed:{}'.format(fx_compressed))
        print('fx_np_model:{}'.format(np_model.decision_function(X_test[i].reshape(1,-1)))) 
        print('pred_comressed:{}'.format(pred_compressed))
        print('pred_np_model:{}'.format(y_pred))
        
print('Acc:{:.2f}'.format(n_true/X_test.shape[0]))


 eval compressed ins

The 0-th ins:
fx_compressed:[-0.42640049]
fx_np_model:[-1.65739295]
pred_comressed:0
pred_np_model:[0]

The 1-th ins:
fx_compressed:[-0.47882802]
fx_np_model:[-1.10803564]
pred_comressed:0
pred_np_model:[0]

The 2-th ins:
fx_compressed:[-0.42769892]
fx_np_model:[-0.27469856]
pred_comressed:0
pred_np_model:[0]

The 3-th ins:
fx_compressed:[-0.41869662]
fx_np_model:[0.64455147]
pred_comressed:0
pred_np_model:[1]

The 4-th ins:
fx_compressed:[-0.51302375]
fx_np_model:[-2.14699336]
pred_comressed:0
pred_np_model:[0]

The 5-th ins:
fx_compressed:[-0.45601776]
fx_np_model:[-1.51682963]
pred_comressed:0
pred_np_model:[0]

The 6-th ins:
fx_compressed:[-0.4659384]
fx_np_model:[-1.84686507]
pred_comressed:0
pred_np_model:[0]

The 7-th ins:
fx_compressed:[-0.43205044]
fx_np_model:[1.06113959]
pred_comressed:0
pred_np_model:[1]

The 8-th ins:
fx_compressed:[-0.45637356]
fx_np_model:[-1.8236303]
pred_comressed:0
pred_np_model:[0]

The 9-th ins:
fx_compressed:[-0.47680964]
fx_

In [1]:
import torch
torch.cuda.is_available()

False

## Check

In [2]:
# check datasets
from utils.check import check_datasets
check_datasets()



  name  	n_features	is_multi	          trainset          	          testset           
  a9a   	  123   	   0    	  [(0, 24720), (1, 7841)]   	  [(0, 12435), (1, 3846)]   
 ijcnn1 	   22   	   0    	  [(0, 45137), (1, 4853)]   	  [(0, 82989), (1, 8712)]   
madelon 	  500   	   0    	   [(0, 1000), (1, 1000)]   	    [(0, 300), (1, 300)]    
mushrooms	  112   	   0    	   [(0, 2738), (1, 2948)]   	   [(0, 1178), (1, 1260)]   
phishing	   68   	   0    	   [(0, 3452), (1, 4286)]   	   [(0, 1446), (1, 1871)]   
 splice 	   60   	   0    	    [(0, 333), (1, 367)]    	    [(0, 150), (1, 150)]    
  w8a   	  300   	   0    	  [(0, 48270), (1, 1479)]   	   [(0, 14497), (1, 454)]   
dna.scale	  180   	   1    	   [(0, 949), (1, 1051)]    	    [(0, 583), (1, 603)]    
 mnist  	  780   	   1    	  [(0, 54051), (1, 5949)]   	   [(0, 8991), (1, 1009)]   
pendigits	   16   	   1    	   [(0, 6775), (1, 719)]    	   [(0, 3162), (1, 336)]    
Sensorless	   48   	   1    	  [(0, 37232), (1, 3724)]   	  

In [1]:
# check np model training and fx function
from utils.check import check_np_traning
check_np_traning()

Loading Finish: Time used 0.31
---Dataset Info---
dataset name:a9a
n_features:123
is_multi:False
Trainset:[(0, 24720), (1, 7841)]
Testset:[(0, 12435), (1, 3846)]
---linear svc---
Training Finish: Time used 4.01
dataset   	acc       	precision 	recall    	f1-score  
trainset  	0.85      	0.80      	0.76      	0.78      
testset   	0.85      	0.80      	0.76      	0.78      
max_alpha_i:1.0
min_alpha_i:0.0007892183376909667
---rbf svc---
Training Finish: Time used 1.90
dataset   	acc       	precision 	recall    	f1-score  
trainset  	0.86      	0.82      	0.78      	0.80      
testset   	0.85      	0.80      	0.76      	0.78      
max_alpha_i:1.0
min_alpha_i:0.0007575813783527016
---poly svc---
Training Finish: Time used 2.41
dataset   	acc       	precision 	recall    	f1-score  
trainset  	0.87      	0.83      	0.79      	0.80      
testset   	0.85      	0.80      	0.76      	0.78      
max_alpha_i:1.0
min_alpha_i:0.0004075180053148361
---sigmoid svc---
Training Finish: Time used 0.54
d

In [1]:
from utils.check import check_kernel
kernels = ['linear','rbf','poly','sigmoid']

for kernel in kernels:
    print('--------{}---------'.format(kernel))
    check_kernel(kernel, 'a9a')
    print(' ')


--------linear---------
Training Finish: Time used 4.00
----show kernel metrics---

 sklearn:
[[14.  7.  7. ...  4.  8.  8.]
 [ 7. 14.  5. ... 10.  5.  9.]
 [ 4.  5.  4. ...  3.  6.  6.]
 ...
 [ 5.  8.  6. ...  8.  5.  8.]
 [ 3.  5.  6. ...  6.  6.  5.]
 [ 6.  7. 10. ...  9.  4.  8.]]
(11518, 10)

 realize:
[[14.  7.  7. ...  4.  8.  8.]
 [ 7. 14.  5. ... 10.  5.  9.]
 [ 4.  5.  4. ...  3.  6.  6.]
 ...
 [ 5.  8.  6. ...  8.  5.  8.]
 [ 3.  5.  6. ...  6.  6.  5.]
 [ 6.  7. 10. ...  9.  4.  8.]]
(11518, 10)

---show hand-10 fx ---

 fx_interface:
[-0.32197564 -0.21942202 -2.52409056 -1.27149923  0.459413    1.24033194
 -4.45229551 -0.11865911 -0.15206388  2.18820474]

 fx_realize:
[-0.32197564 -0.21942202 -2.52409056 -1.27149923  0.459413    1.24033194
 -4.45229551 -0.11865911 -0.15206388  2.18820474]

 decsion function:
[-0.32197564 -0.21942202 -2.52409056 -1.27149923  0.459413    1.24033194
 -4.45229551 -0.11865911 -0.15206388  2.18820474]

 y_pred:
[0 0 0 0 1 1 0 0 0 1]

---show com

In [1]:
# conv module check
from utils.check import conv_check
conv_check('rbf', 'a9a')

Training Finish: Time used 2.27
----label 0----
before X_temp_shape: (6118, 123)
before coef_temp_shape: (6118, 1)
after X_temp_shape: (6016, 123)
after coef_shape: (11750, 1)
n_parts: 47.0
47 map for each 128 item,total 6016
batch X_temp_shape: (47, 1, 128, 123)
batch coef_temp_shape: (47, 128, 1)
----label 1----
before X_temp_shape: (5632, 123)
before coef_temp_shape: (5632, 1)
after X_temp_shape: (5632, 123)
after coef_shape: (11750, 1)
n_parts: 44.0
44 map for each 128 item,total 5632
batch X_temp_shape: (44, 1, 128, 123)
batch coef_temp_shape: (44, 128, 1)
----data distribution----
train data - num:32561, distribution:[0, 1]
fit data - num:11750, distribution:[0, 1]
 
torch.Size([116, 123])
90.79% data - num:10556, distribution:[(0, 5452), (1, 5104)]
torch.Size([103, 123])
80.70% data - num:9373, distribution:[(0, 4841), (1, 4532)]
torch.Size([90, 123])
70.61% data - num:8190, distribution:[(0, 4230), (1, 3960)]
torch.Size([77, 123])
60.53% data - num:7007, distribution:[(0, 3619)

In [1]:
from utils.check import datamodule_check
datamodule_check('rbf', 'a9a')

Training Finish: Time used 2.29
----label 0----
before X_temp_shape: (6118, 123)
before coef_temp_shape: (6118, 1)
after X_temp_shape: (6016, 123)
after coef_shape: (11750, 1)
n_parts: 47.0
47 map for each 128 item,total 6016
batch X_temp_shape: (47, 1, 128, 123)
batch coef_temp_shape: (47, 128, 1)
----label 1----
before X_temp_shape: (5632, 123)
before coef_temp_shape: (5632, 1)
after X_temp_shape: (5632, 123)
after coef_shape: (11750, 1)
n_parts: 44.0
44 map for each 128 item,total 5632
batch X_temp_shape: (44, 1, 128, 123)
batch coef_temp_shape: (44, 128, 1)
train data
X size: torch.Size([64, 123])
fx size: torch.Size([64])
tensor([ 4.8923,  4.2217,  5.0499,  6.2265,  5.1495,  5.3396,  8.9146,  8.2607,
        10.1467,  9.1276,  5.6376,  7.1242,  9.2530,  5.2876,  4.5125,  4.6420,
         8.7168,  7.3550,  8.9088,  8.6178,  4.2905,  7.9247,  6.6408,  5.5920,
         7.1739,  8.3154,  9.0454,  9.0461,  4.8636,  6.0341,  4.4665,  5.6854,
         9.3873,  6.1502,  5.1669,  6.2281,  