In [97]:
import pandas as pd
import numpy as np
from model.booster import GBDT_Muti
from model.dataset import DataSet
from model.configs import Configs


In [98]:
x_train = np.load('data/yeast/yeast_traindata.npy')
y_train = np.load('data/yeast/yeast_trainlabel.npy')
x_test = np.load('data/yeast/yeast_testdata.npy')
y_test = np.load('data/yeast/yeast_testlabel.npy')

In [99]:
y_train.shape

(1554, 4)

In [100]:
x_train.shape

(1554, 103)

In [101]:
dataset_name = 'yeast'
dataset_train = DataSet(x_train, y_train)
dataset_test = DataSet(x_test, y_test)
configs = Configs('configs/configs_{}.json'.format(dataset_name))

In [102]:
gbdt = GBDT_Muti(configs)
gbdt.fit(dataset_train,dataset_test)


iter1 : valid loss=2.499250
iter1 : train loss=2.216114
iter2 : valid loss=2.169503
iter2 : train loss=1.941331
iter3 : valid loss=1.903361
iter3 : train loss=1.714587
iter4 : valid loss=1.687022
iter4 : train loss=1.529630
iter5 : valid loss=1.510427
iter5 : train loss=1.377527
iter6 : valid loss=1.363638
iter6 : train loss=1.254239
iter7 : valid loss=1.246302
iter7 : train loss=1.151269
iter8 : valid loss=1.147688
iter8 : train loss=1.066252
iter9 : valid loss=1.067579
iter9 : train loss=0.995795
iter10 : valid loss=1.001115
iter10 : train loss=0.938116
iter11 : valid loss=0.949619
iter11 : train loss=0.890353
iter12 : valid loss=0.906476
iter12 : train loss=0.850315
iter13 : valid loss=0.869650
iter13 : train loss=0.816899
iter14 : valid loss=0.840697
iter14 : train loss=0.787974
iter15 : valid loss=0.816493
iter15 : train loss=0.764273
iter16 : valid loss=0.795710
iter16 : train loss=0.744951
iter17 : valid loss=0.779447
iter17 : train loss=0.728036
iter18 : valid loss=0.767391
ite

In [103]:
pred_prob = gbdt.predict_set_prob(dataset_test.X)
pred_labels = gbdt.predict_set_label(dataset_test.X)

In [104]:
pred_prob_df = pd.DataFrame(pred_prob)
pred_labels_df = pd.DataFrame(pred_labels)
pred_prob_df.to_csv('./result/he_pred_prob.csv', index=False)
pred_labels_df.to_csv('./result/he_pred_label.csv', index=False)

In [105]:
pred_labels_df

Unnamed: 0,0,1,2,3
0,1,1,0,0
1,1,1,0,0
2,1,1,0,0
3,1,1,0,1
4,1,1,0,0
...,...,...,...,...
662,1,1,0,1
663,1,1,0,0
664,1,1,0,0
665,1,1,0,0


In [106]:
from sklearn.metrics import accuracy_score, f1_score, hamming_loss

In [107]:
f1_score_macro = f1_score(y_test, pred_labels_df, average='macro')
f1_score_macro

0.5980963494263093

In [108]:
f1_score_micro = f1_score(y_test, pred_labels_df, average='micro')
f1_score_micro

0.7540453074433657

In [109]:
acc = accuracy_score(y_test, pred_labels_df)
acc

0.3733133433283358

In [110]:
ham = hamming_loss(y_test, pred_labels_df)
ham

0.2848575712143928

In [111]:
record = pd.read_csv('record.csv')
record

Unnamed: 0,dataset,learn_rate,max_depth,stop_iter,f1_score_macro,f1_score_micro,acc,ham
0,he,0.5,3,12,0.36985,0.667254,0.531667,0.1575
1,he,0.1,3,49,0.398767,0.714405,0.598333,0.142917
2,he,0.3,3,23,0.380834,0.702929,0.578333,0.147917
3,he,0.8,4,10,0.418323,0.731984,0.615,0.137917
4,he,0.8,3,10,0.422834,0.728595,0.61,0.14
5,re,0.1,3,95,0.623452,0.847726,0.686667,0.09625
6,he,1.0,3,5,0.441586,0.738654,0.631667,0.139167
7,re,0.5,3,23,0.646683,0.843501,0.676667,0.098333
8,re,0.8,3,10,0.640336,0.842587,0.675,0.100417
9,re,0.8,4,17,0.681009,0.838966,0.658333,0.10125


In [112]:
record = record.append([{
    "dataset": dataset_name,
    "learn_rate": configs.learn_rate,
    "max_depth": configs.max_depth,
    "stop_iter": gbdt.stop_iter,
    "f1_score_macro":f1_score_macro,
    "f1_score_micro":f1_score_micro,
    "acc":acc,
    "ham":ham
    }])
record.to_csv('record.csv', index=False)