In [1]:
import multiprocessing
from GCForest import gcForest
import pandas as pd
import numpy as np
import pickle 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import Imputer
import warnings
warnings.filterwarnings("ignore")
#用来计算程序运行时间
import datetime
starttime = datetime.datetime.now()

In [2]:
#读取Y
all_df_y = pickle.load(open("./dump_file/all_df_y","rb"))
all_df_y['2cArousal'] = 0
all_df_y['2cArousal'][all_df_y['valence'] >= 5] = 1
all_df_y['2cValence'] = 0
all_df_y['2cValence'][all_df_y['valence'] >= 5] = 1
print(all_df_y.head(5))

#读取1个通道的RSP数据，每个通道包含32×40=1280个信号样本
#每个样本向量大小为8064点（63s*128Hz）
all_df_RSP_x = pickle.load(open("./dump_file/all_df_RSP_x","rb"))
#y = all_df_y[['2cArousal']]
y = all_df_y[['2cValence']]
for seed in [0,100,200,300,400,500,600,700,800,900]:
    print("[seed:{}]****************************************************".format(seed))
    xTrainIdx = pickle.load(open("./dump_file/xTrainIdx_{}".format(seed),"rb"))
    xTestIdx = pickle.load(open("./dump_file/xTestIdx_{}".format(seed),"rb"))
    y_tr = y.loc[xTrainIdx]
    y_te = y.loc[xTestIdx]

    RSPTrainSet = all_df_RSP_x.loc[xTrainIdx]
    RSPTestSet = all_df_RSP_x.loc[xTestIdx]

    myWindowsSize = 256
    myStrideSize = 64
    gcf = gcForest(shape_1X=8064, window=myWindowsSize, stride=myStrideSize,tolerance=0.0,n_cascadeRF=1, 
                   min_samples_mgs=0.1, min_samples_cascade=0.1,n_jobs=19)
    if True:     
        print("RSP running multi-grain scan")
        xTrain,yTrain = RSPTrainSet.values,y_tr.values
        xTest = RSPTestSet.values   
        RSP_mgsTrainVector = gcf.mg_scanning(xTrain,yTrain)
        RSP_mgsTestVector = gcf.mg_scanning(xTest)
        filePath = "./dump_file_V2/RSP_mgsTrainVector_{}_{}_{}".format(myWindowsSize,myStrideSize,seed)
        pickle.dump(RSP_mgsTrainVector,open(filePath,"wb"))
        filePath = "./dump_file_V2/RSP_mgsTestVector_{}_{}_{}".format(myWindowsSize,myStrideSize,seed)
        pickle.dump(RSP_mgsTestVector,open(filePath,"wb"))
    else:
        filePath = "./dump_file_V2/RSP_mgsTrainVector_{}_{}".format(myWindowsSize,myStrideSize)
        RSP_mgsTrainVector = pickle.load(open(filePath,"rb"))
        filePath = "./dump_file_V2/RSP_mgsTestVector_{}_{}".format(myWindowsSize,myStrideSize)
        RSP_mgsTestVector = pickle.load(open(filePath,"rb"))

    X_tr_vector = RSP_mgsTrainVector
    X_te_vector = RSP_mgsTestVector
    print(X_tr_vector.shape)
    print(X_te_vector.shape)

    #有缺失值，填充下
    X_tr_vector_fillna= pd.DataFrame(X_tr_vector).fillna(0).values
    X_te_vector_fillna= pd.DataFrame(X_te_vector).fillna(0).values

    _ = gcf.cascade_forest(X_tr_vector_fillna, y_tr)

    pred_proba = gcf.cascade_forest(X_te_vector_fillna)
    tmp = np.mean(pred_proba, axis=0)
    preds = np.argmax(tmp, axis=1)
    print("ACC",accuracy_score(y_true=y_te, y_pred=preds))
    print("F1",f1_score(y_true=y_te, y_pred=preds))
    print("Recal",recall_score(y_true=y_te, y_pred=preds))
    print("Precision",precision_score(y_true=y_te, y_pred=preds))

       valence  arousal  dominance  liking  2cArousal  2cValence
s01_0     7.71     7.60       6.90    7.83          1          1
s01_1     8.10     7.31       7.28    8.47          1          1
s01_2     8.58     7.54       9.00    7.08          1          1
s01_3     4.94     6.01       6.12    8.06          0          0
s01_4     6.96     3.92       7.19    6.05          1          1
[seed:0]****************************************************
RSP running multi-grain scan
Slicing Sequence...
Training MGS Random Forests...
Slicing Sequence...
(896, 492)
(384, 492)
Adding/Training Layer, n_layer=1
Layer validation accuracy = 0.65
Adding/Training Layer, n_layer=2
Layer validation accuracy = 0.65
ACC 0.6692708333333334
F1 0.758095238095238
Recal 0.9170506912442397
Precision 0.6461038961038961
[seed:100]****************************************************
RSP running multi-grain scan
Slicing Sequence...
Training MGS Random Forests...
Slicing Sequence...
(896, 492)
(384, 492)
Adding/Train

In [3]:
#preds = np.zeros(shape=(384,))
#preds = np.ones(shape=(384,))
preds = np.ones(shape=(384,))
for i in range(0,384):
    temp = np.random.choice(a=2, size=1, replace=False, p=[0.5,0.5])
    preds[i] = temp
print("ACC",accuracy_score(y_true=y_te, y_pred=preds))
print("F1",f1_score(y_true=y_te, y_pred=preds))
print("Recal",recall_score(y_true=y_te, y_pred=preds))
print("Precision",precision_score(y_true=y_te, y_pred=preds))

ACC 0.4739583333333333
F1 0.4898989898989899
Recal 0.4470046082949309
Precision 0.5418994413407822
