In [1]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import xgboost as xgb
from scipy import stats
import shap
import seaborn as sns

def SHAP(bst, X_train_df):
    # SHAP値を計算
    explainer = shap.TreeExplainer(bst)
    shap_values = explainer.shap_values(X_train_df)
    
    # SHAP値をプロット
    shap.summary_plot(shap_values, X_train_df, max_display=X_train_df.shape[1])
    return shap_values

def BFI(bst):
    # ビルトインのフィーチャーインポータンスを取得
    feature_importances = bst.get_score(importance_type='weight')

    # 特徴量の名前とインポータンスをプリント
    for key, value in feature_importances.items():
        print(f"Feature: {key}, Importance: {value}")

    # フィーチャーインポータンスをプロット
    xgb.plot_importance(bst)
    plt.show()
    return

def XGBoost(X, y):
    # データを訓練用とテスト用に分割
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # 訓練データをさらに訓練用と検証用に分割
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=0)

    # モデルのパラメータを設定(CPU)
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'eta': 0.1,
        'max_depth': 4,
        'seed': 0
    }
    
    """   # モデルのパラメータを設定(GPU)
    params = {
        'objective': 'multi:softmax',
        'eval_metric': 'mlogloss',
        'num_class' : 4,
        'eta': 0.1,
        'max_depth': 12,
        'seed': 0,
        'tree_method': 'gpu_hist',
        'predictor': 'gpu_predictor'
    }"""

    # データをpandas.DataFrame形式で保存
    X_train_df = pd.DataFrame(X_train, columns=column_names)

    # 訓練データと検証データをXGBoostのDMatrix形式に変換
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval = xgb.DMatrix(X_val, label=y_val)

    # 訓練データと検証データのセットをリストに格納
    evals = [(dtrain, 'train'), (dval, 'eval')]

    # モデルを訓練
    bst = xgb.train(params, dtrain, num_boost_round=10000, evals=evals, early_stopping_rounds=100)

    # テストデータをDMatrix形式に変換
    dtest = xgb.DMatrix(X_test)

    # 訓練データで予測
    y_train_pred = bst.predict(dtrain)

    # テストデータで予測
    y_pred = bst.predict(dtest)

    # 評価
    train_accuracy = accuracy_score(y_train, y_train_pred.round())
    print(f"Training Accuracy: {train_accuracy*100:.2f}%")

    # 評価
    test_accuracy = accuracy_score(y_test, y_pred.round())
    print(f"Test Accuracy: {test_accuracy*100:.2f}%")

    #SHAP(bst, X_train_df)
    # SHAP値を計算
    explainer = shap.TreeExplainer(bst)
    shap_values = explainer.shap_values(X_train_df)
    
    # SHAP値をプロット
    shap.summary_plot(shap_values, X_train_df, max_display=X_train_df.shape[1])
    
    #BFI(bst)
    return shap_values, y_train_pred, X_train_df, explainer

def normalize_per_state(df, start, end):
    scaler = StandardScaler()
    df.iloc[start:end, :] = scaler.fit_transform(df.iloc[start:end, :])
    return df


In [2]:
raw = 0 #rawデータならば１、Bandなら０,

# 1秒ごとのデータに分割
n_samples_per_second = 256  # 256Hzのサンプリングレート
total_seconds = 10  # 全体の秒数

# データの読み込み
if(raw):
    df = pd.read_csv('Raw.csv')
    #df = pd.read_csv('Raw_ICA.csv')
else:
    #df = pd.read_csv('Band.csv')
    df = pd.read_csv('emotion.csv')


# チャンネルごとに正規化
scaler = StandardScaler()
for column in df.columns[:-1]:  # 'State'列を除くすべての列
    df[column] = scaler.fit_transform(df[column].values.reshape(-1, 1))


In [3]:

# Hzごと
# 特徴量と目標変数を抽出する
X = df.drop('State', axis=1)  # 'State'以外の列すべてを特徴量とします
y = df['State']  # 'State'を目標変数とします

# 列名のリストを定義
column_names = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
#XGBoost
#XGBoost(X, y)

'''
# XGBoostを実行して訓練データと予測値を取得
shap_values, y_train_pred, X_train_df, explainer = XGBoost(X, y)

"""
# 予測値をpandas DataFrameに変換
predictions = pd.DataFrame(y_train_pred, columns=['Prediction'])

# 予測値を二値化（0.5を閾値とする）
predictions_binary = predictions['Prediction'].apply(lambda x: 1 if x > 0.5 else 0)

# EEGデータと二値化した予測値を結合
combined_data = pd.concat([X_train_df, predictions_binary], axis=1)

# 相関行列を計算
correlation_matrix = combined_data.corr()

# 'Prediction'とEEGチャネルの相関を表示
print(correlation_matrix['Prediction'])

# 相関行列のヒートマップを描画
sns.heatmap(correlation_matrix, 
            xticklabels=correlation_matrix.columns,
            yticklabels=correlation_matrix.columns,
            cmap='RdBu_r',
            annot=True,
            linewidth=0.5)
"""
# 予測値を二値化（0.5を閾値とする）
y_train_pred_binary = [1 if pred > 0.5 else 0 for pred in y_train_pred]

# DataFrameに変換
df_shap = pd.DataFrame(shap_values, columns=column_names)
df_shap['Prediction'] = y_train_pred_binary

# 予測結果ごとにSHAP値を分割
df_shap_0 = df_shap[df_shap['Prediction'] == 0].drop(columns=['Prediction'])
df_shap_1 = df_shap[df_shap['Prediction'] == 1].drop(columns=['Prediction'])

# 各チャンネルについてSHAP値とEEG電圧との相関を計算
correlations_0 = df_shap_0.corrwith(X_train_df.loc[df_shap_0.index.intersection(X_train_df.index)])
correlations_1 = df_shap_1.corrwith(X_train_df.loc[df_shap_1.index.intersection(X_train_df.index)])

correlations_df = pd.DataFrame({
    'Prediction_0': correlations_0,
    'Prediction_1': correlations_1
})

correlations_df.T
'''

'\n# XGBoostを実行して訓練データと予測値を取得\nshap_values, y_train_pred, X_train_df, explainer = XGBoost(X, y)\n\n"""\n# 予測値をpandas DataFrameに変換\npredictions = pd.DataFrame(y_train_pred, columns=[\'Prediction\'])\n\n# 予測値を二値化（0.5を閾値とする）\npredictions_binary = predictions[\'Prediction\'].apply(lambda x: 1 if x > 0.5 else 0)\n\n# EEGデータと二値化した予測値を結合\ncombined_data = pd.concat([X_train_df, predictions_binary], axis=1)\n\n# 相関行列を計算\ncorrelation_matrix = combined_data.corr()\n\n# \'Prediction\'とEEGチャネルの相関を表示\nprint(correlation_matrix[\'Prediction\'])\n\n# 相関行列のヒートマップを描画\nsns.heatmap(correlation_matrix, \n            xticklabels=correlation_matrix.columns,\n            yticklabels=correlation_matrix.columns,\n            cmap=\'RdBu_r\',\n            annot=True,\n            linewidth=0.5)\n"""\n# 予測値を二値化（0.5を閾値とする）\ny_train_pred_binary = [1 if pred > 0.5 else 0 for pred in y_train_pred]\n\n# DataFrameに変換\ndf_shap = pd.DataFrame(shap_values, columns=column_names)\ndf_shap[\'Prediction\'] = y_train_pred_binar

In [None]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# 訓練データをさらに訓練用と検証用に分割
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.3, random_state=0)
# モデルのパラメータを設定(CPU)

"""
params = {
'objective': 'binary:logistic',
'eval_metric': 'logloss',
'eta': 0.1,
'max_depth': 4,
'seed': 0
}
"""      

# モデルのパラメータを設定(GPU)
params = {
'objective': 'multi:softmax',
'eval_metric': 'mlogloss',
'num_class' : 4,
'eta': 0.1,
'max_depth': 12,
'seed': 0,
'tree_method': 'gpu_hist',
'predictor': 'gpu_predictor'
}


# データをpandas.DataFrame形式で保存
X_train_df = pd.DataFrame(X_train, columns=column_names)

# 訓練データと検証データをXGBoostのDMatrix形式に変換
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)

# 訓練データと検証データのセットをリストに格納
evals = [(dtrain, 'train'), (dval, 'eval')]

# モデルを訓練
bst = xgb.train(params, dtrain, num_boost_round=10000, evals=evals, early_stopping_rounds=100)

# テストデータをDMatrix形式に変換
dtest = xgb.DMatrix(X_test)

# 訓練データで予測
y_train_pred = bst.predict(dtrain)

# テストデータで予測
y_pred = bst.predict(dtest)

# 評価
train_accuracy = accuracy_score(y_train, y_train_pred.round())
print(f"Training Accuracy: {train_accuracy*100:.2f}%")

# 評価
test_accuracy = accuracy_score(y_test, y_pred.round())
print(f"Test Accuracy: {test_accuracy*100:.2f}%")

#SHAP(bst, X_train_df)
# SHAP値を計算 (バッチ処理)
explainer = shap.TreeExplainer(bst)
batch_size = 50  # バッチサイズの設定
shap_values = []

# バッチごとにSHAP値を計算
for i in range(0, X_train_df.shape[0], batch_size):
    batch_start = i
    batch_end = min(i + batch_size, X_train_df.shape[0])
    batch = X_train_df.iloc[batch_start:batch_end]
    shap_values.append(explainer.shap_values(batch))

# shap_valuesはリストのリストになってしまうので、一つのリストに結合
shap_values = np.concatenate(shap_values, axis=0)
    
# SHAP値をプロット
#shap.summary_plot(shap_values, X_train_df, max_display=X_train_df.shape[1])

[0]	train-mlogloss:1.36316	eval-mlogloss:1.36561
[1]	train-mlogloss:1.34228	eval-mlogloss:1.34716
[2]	train-mlogloss:1.32384	eval-mlogloss:1.33098
[3]	train-mlogloss:1.30699	eval-mlogloss:1.31645
[4]	train-mlogloss:1.29196	eval-mlogloss:1.30353
[5]	train-mlogloss:1.27825	eval-mlogloss:1.29185
[6]	train-mlogloss:1.26565	eval-mlogloss:1.28122
[7]	train-mlogloss:1.25380	eval-mlogloss:1.27128
[8]	train-mlogloss:1.24254	eval-mlogloss:1.26197
[9]	train-mlogloss:1.23204	eval-mlogloss:1.25358
[10]	train-mlogloss:1.22206	eval-mlogloss:1.24565
[11]	train-mlogloss:1.21329	eval-mlogloss:1.23865
[12]	train-mlogloss:1.20457	eval-mlogloss:1.23188
[13]	train-mlogloss:1.19662	eval-mlogloss:1.22563
[14]	train-mlogloss:1.18826	eval-mlogloss:1.21927
[15]	train-mlogloss:1.18081	eval-mlogloss:1.21349
[16]	train-mlogloss:1.17365	eval-mlogloss:1.20813
[17]	train-mlogloss:1.16699	eval-mlogloss:1.20306
[18]	train-mlogloss:1.16064	eval-mlogloss:1.19845
[19]	train-mlogloss:1.15417	eval-mlogloss:1.19371
[20]	train

[163]	train-mlogloss:0.78582	eval-mlogloss:0.99321
[164]	train-mlogloss:0.78461	eval-mlogloss:0.99257
[165]	train-mlogloss:0.78192	eval-mlogloss:0.99160
[166]	train-mlogloss:0.78010	eval-mlogloss:0.99084
[167]	train-mlogloss:0.77839	eval-mlogloss:0.99017
[168]	train-mlogloss:0.77635	eval-mlogloss:0.98936
[169]	train-mlogloss:0.77300	eval-mlogloss:0.98789
[170]	train-mlogloss:0.77182	eval-mlogloss:0.98739
[171]	train-mlogloss:0.77034	eval-mlogloss:0.98671
[172]	train-mlogloss:0.76875	eval-mlogloss:0.98606
[173]	train-mlogloss:0.76722	eval-mlogloss:0.98542
[174]	train-mlogloss:0.76554	eval-mlogloss:0.98476
[175]	train-mlogloss:0.76298	eval-mlogloss:0.98383
[176]	train-mlogloss:0.76143	eval-mlogloss:0.98311
[177]	train-mlogloss:0.75980	eval-mlogloss:0.98247
[178]	train-mlogloss:0.75879	eval-mlogloss:0.98198
[179]	train-mlogloss:0.75720	eval-mlogloss:0.98138
[180]	train-mlogloss:0.75560	eval-mlogloss:0.98073
[181]	train-mlogloss:0.75374	eval-mlogloss:0.97996
[182]	train-mlogloss:0.75132	ev

[324]	train-mlogloss:0.55321	eval-mlogloss:0.91048
[325]	train-mlogloss:0.55173	eval-mlogloss:0.90997
[326]	train-mlogloss:0.55091	eval-mlogloss:0.90970
[327]	train-mlogloss:0.54946	eval-mlogloss:0.90925
[328]	train-mlogloss:0.54814	eval-mlogloss:0.90879
[329]	train-mlogloss:0.54667	eval-mlogloss:0.90833
[330]	train-mlogloss:0.54569	eval-mlogloss:0.90804
[331]	train-mlogloss:0.54488	eval-mlogloss:0.90781
[332]	train-mlogloss:0.54419	eval-mlogloss:0.90759
[333]	train-mlogloss:0.54302	eval-mlogloss:0.90725
[334]	train-mlogloss:0.54207	eval-mlogloss:0.90698
[335]	train-mlogloss:0.54124	eval-mlogloss:0.90671
[336]	train-mlogloss:0.54000	eval-mlogloss:0.90633
[337]	train-mlogloss:0.53872	eval-mlogloss:0.90593
[338]	train-mlogloss:0.53730	eval-mlogloss:0.90551
[339]	train-mlogloss:0.53589	eval-mlogloss:0.90513
[340]	train-mlogloss:0.53396	eval-mlogloss:0.90457
[341]	train-mlogloss:0.53298	eval-mlogloss:0.90429
[342]	train-mlogloss:0.53228	eval-mlogloss:0.90400
[343]	train-mlogloss:0.53112	ev

[485]	train-mlogloss:0.39334	eval-mlogloss:0.86588
[486]	train-mlogloss:0.39226	eval-mlogloss:0.86565
[487]	train-mlogloss:0.39133	eval-mlogloss:0.86537
[488]	train-mlogloss:0.39063	eval-mlogloss:0.86516
[489]	train-mlogloss:0.38955	eval-mlogloss:0.86492
[490]	train-mlogloss:0.38869	eval-mlogloss:0.86470
[491]	train-mlogloss:0.38795	eval-mlogloss:0.86450
[492]	train-mlogloss:0.38720	eval-mlogloss:0.86433
[493]	train-mlogloss:0.38655	eval-mlogloss:0.86416
[494]	train-mlogloss:0.38562	eval-mlogloss:0.86391
[495]	train-mlogloss:0.38489	eval-mlogloss:0.86374
[496]	train-mlogloss:0.38432	eval-mlogloss:0.86359
[497]	train-mlogloss:0.38367	eval-mlogloss:0.86343
[498]	train-mlogloss:0.38335	eval-mlogloss:0.86331
[499]	train-mlogloss:0.38263	eval-mlogloss:0.86312
[500]	train-mlogloss:0.38224	eval-mlogloss:0.86299
[501]	train-mlogloss:0.38187	eval-mlogloss:0.86289
[502]	train-mlogloss:0.38086	eval-mlogloss:0.86260
[503]	train-mlogloss:0.38030	eval-mlogloss:0.86243
[504]	train-mlogloss:0.37945	ev

[646]	train-mlogloss:0.28113	eval-mlogloss:0.83733
[647]	train-mlogloss:0.28046	eval-mlogloss:0.83713
[648]	train-mlogloss:0.27985	eval-mlogloss:0.83703
[649]	train-mlogloss:0.27941	eval-mlogloss:0.83694
[650]	train-mlogloss:0.27876	eval-mlogloss:0.83683
[651]	train-mlogloss:0.27807	eval-mlogloss:0.83667
[652]	train-mlogloss:0.27778	eval-mlogloss:0.83653
[653]	train-mlogloss:0.27700	eval-mlogloss:0.83632
[654]	train-mlogloss:0.27645	eval-mlogloss:0.83615
[655]	train-mlogloss:0.27585	eval-mlogloss:0.83597
[656]	train-mlogloss:0.27540	eval-mlogloss:0.83587
[657]	train-mlogloss:0.27462	eval-mlogloss:0.83570
[658]	train-mlogloss:0.27399	eval-mlogloss:0.83556
[659]	train-mlogloss:0.27343	eval-mlogloss:0.83547
[660]	train-mlogloss:0.27299	eval-mlogloss:0.83532
[661]	train-mlogloss:0.27261	eval-mlogloss:0.83521
[662]	train-mlogloss:0.27193	eval-mlogloss:0.83507
[663]	train-mlogloss:0.27138	eval-mlogloss:0.83491
[664]	train-mlogloss:0.27100	eval-mlogloss:0.83481
[665]	train-mlogloss:0.27027	ev

[807]	train-mlogloss:0.20047	eval-mlogloss:0.81815
[808]	train-mlogloss:0.19977	eval-mlogloss:0.81802
[809]	train-mlogloss:0.19920	eval-mlogloss:0.81790
[810]	train-mlogloss:0.19875	eval-mlogloss:0.81774
[811]	train-mlogloss:0.19802	eval-mlogloss:0.81759
[812]	train-mlogloss:0.19761	eval-mlogloss:0.81752
[813]	train-mlogloss:0.19713	eval-mlogloss:0.81736
[814]	train-mlogloss:0.19660	eval-mlogloss:0.81721
[815]	train-mlogloss:0.19632	eval-mlogloss:0.81711
[816]	train-mlogloss:0.19585	eval-mlogloss:0.81699
[817]	train-mlogloss:0.19537	eval-mlogloss:0.81688
[818]	train-mlogloss:0.19503	eval-mlogloss:0.81677
[819]	train-mlogloss:0.19484	eval-mlogloss:0.81676
[820]	train-mlogloss:0.19428	eval-mlogloss:0.81662
[821]	train-mlogloss:0.19391	eval-mlogloss:0.81654
[822]	train-mlogloss:0.19338	eval-mlogloss:0.81639
[823]	train-mlogloss:0.19290	eval-mlogloss:0.81628
[824]	train-mlogloss:0.19251	eval-mlogloss:0.81621
[825]	train-mlogloss:0.19218	eval-mlogloss:0.81616
[826]	train-mlogloss:0.19189	ev

[968]	train-mlogloss:0.14817	eval-mlogloss:0.80549
[969]	train-mlogloss:0.14792	eval-mlogloss:0.80539
[970]	train-mlogloss:0.14748	eval-mlogloss:0.80533
[971]	train-mlogloss:0.14714	eval-mlogloss:0.80528
[972]	train-mlogloss:0.14677	eval-mlogloss:0.80516
[973]	train-mlogloss:0.14661	eval-mlogloss:0.80515
[974]	train-mlogloss:0.14614	eval-mlogloss:0.80506
[975]	train-mlogloss:0.14585	eval-mlogloss:0.80500
[976]	train-mlogloss:0.14549	eval-mlogloss:0.80491
[977]	train-mlogloss:0.14520	eval-mlogloss:0.80484
[978]	train-mlogloss:0.14497	eval-mlogloss:0.80480
[979]	train-mlogloss:0.14475	eval-mlogloss:0.80474
[980]	train-mlogloss:0.14450	eval-mlogloss:0.80469
[981]	train-mlogloss:0.14421	eval-mlogloss:0.80464
[982]	train-mlogloss:0.14386	eval-mlogloss:0.80458
[983]	train-mlogloss:0.14340	eval-mlogloss:0.80448
[984]	train-mlogloss:0.14310	eval-mlogloss:0.80441
[985]	train-mlogloss:0.14287	eval-mlogloss:0.80433
[986]	train-mlogloss:0.14266	eval-mlogloss:0.80430
[987]	train-mlogloss:0.14239	ev

[1127]	train-mlogloss:0.10808	eval-mlogloss:0.79687
[1128]	train-mlogloss:0.10783	eval-mlogloss:0.79685
[1129]	train-mlogloss:0.10776	eval-mlogloss:0.79683
[1130]	train-mlogloss:0.10757	eval-mlogloss:0.79681
[1131]	train-mlogloss:0.10740	eval-mlogloss:0.79680
[1132]	train-mlogloss:0.10708	eval-mlogloss:0.79668
[1133]	train-mlogloss:0.10681	eval-mlogloss:0.79660
[1134]	train-mlogloss:0.10661	eval-mlogloss:0.79655
[1135]	train-mlogloss:0.10638	eval-mlogloss:0.79647
[1136]	train-mlogloss:0.10623	eval-mlogloss:0.79642
[1137]	train-mlogloss:0.10609	eval-mlogloss:0.79638
[1138]	train-mlogloss:0.10592	eval-mlogloss:0.79634
[1139]	train-mlogloss:0.10582	eval-mlogloss:0.79628
[1140]	train-mlogloss:0.10572	eval-mlogloss:0.79625
[1141]	train-mlogloss:0.10560	eval-mlogloss:0.79625
[1142]	train-mlogloss:0.10541	eval-mlogloss:0.79620
[1143]	train-mlogloss:0.10523	eval-mlogloss:0.79616
[1144]	train-mlogloss:0.10501	eval-mlogloss:0.79608
[1145]	train-mlogloss:0.10480	eval-mlogloss:0.79605
[1146]	train

[1285]	train-mlogloss:0.08055	eval-mlogloss:0.79039
[1286]	train-mlogloss:0.08037	eval-mlogloss:0.79035
[1287]	train-mlogloss:0.08035	eval-mlogloss:0.79033
[1288]	train-mlogloss:0.08030	eval-mlogloss:0.79032
[1289]	train-mlogloss:0.08021	eval-mlogloss:0.79031
[1290]	train-mlogloss:0.08010	eval-mlogloss:0.79031
[1291]	train-mlogloss:0.07993	eval-mlogloss:0.79026
[1292]	train-mlogloss:0.07983	eval-mlogloss:0.79024
[1293]	train-mlogloss:0.07974	eval-mlogloss:0.79020
[1294]	train-mlogloss:0.07972	eval-mlogloss:0.79019
[1295]	train-mlogloss:0.07953	eval-mlogloss:0.79014
[1296]	train-mlogloss:0.07947	eval-mlogloss:0.79013
[1297]	train-mlogloss:0.07943	eval-mlogloss:0.79012
[1298]	train-mlogloss:0.07936	eval-mlogloss:0.79011
[1299]	train-mlogloss:0.07924	eval-mlogloss:0.79009
[1300]	train-mlogloss:0.07917	eval-mlogloss:0.79007
[1301]	train-mlogloss:0.07910	eval-mlogloss:0.79002
[1302]	train-mlogloss:0.07901	eval-mlogloss:0.79002
[1303]	train-mlogloss:0.07894	eval-mlogloss:0.78998
[1304]	train

[1443]	train-mlogloss:0.06320	eval-mlogloss:0.78670
[1444]	train-mlogloss:0.06303	eval-mlogloss:0.78666
[1445]	train-mlogloss:0.06291	eval-mlogloss:0.78666
[1446]	train-mlogloss:0.06281	eval-mlogloss:0.78665
[1447]	train-mlogloss:0.06272	eval-mlogloss:0.78665
[1448]	train-mlogloss:0.06261	eval-mlogloss:0.78661
[1449]	train-mlogloss:0.06250	eval-mlogloss:0.78660
[1450]	train-mlogloss:0.06242	eval-mlogloss:0.78659
[1451]	train-mlogloss:0.06236	eval-mlogloss:0.78658
[1452]	train-mlogloss:0.06222	eval-mlogloss:0.78656
[1453]	train-mlogloss:0.06216	eval-mlogloss:0.78656
[1454]	train-mlogloss:0.06211	eval-mlogloss:0.78656
[1455]	train-mlogloss:0.06200	eval-mlogloss:0.78654
[1456]	train-mlogloss:0.06187	eval-mlogloss:0.78654
[1457]	train-mlogloss:0.06180	eval-mlogloss:0.78653
[1458]	train-mlogloss:0.06176	eval-mlogloss:0.78654
[1459]	train-mlogloss:0.06167	eval-mlogloss:0.78653
[1460]	train-mlogloss:0.06148	eval-mlogloss:0.78652
[1461]	train-mlogloss:0.06140	eval-mlogloss:0.78652
[1462]	train

[1601]	train-mlogloss:0.04935	eval-mlogloss:0.78494
[1602]	train-mlogloss:0.04925	eval-mlogloss:0.78495
[1603]	train-mlogloss:0.04920	eval-mlogloss:0.78496
[1604]	train-mlogloss:0.04913	eval-mlogloss:0.78493
[1605]	train-mlogloss:0.04907	eval-mlogloss:0.78491
[1606]	train-mlogloss:0.04905	eval-mlogloss:0.78491
[1607]	train-mlogloss:0.04900	eval-mlogloss:0.78491
[1608]	train-mlogloss:0.04896	eval-mlogloss:0.78492
[1609]	train-mlogloss:0.04893	eval-mlogloss:0.78489
[1610]	train-mlogloss:0.04883	eval-mlogloss:0.78490
[1611]	train-mlogloss:0.04878	eval-mlogloss:0.78488
[1612]	train-mlogloss:0.04873	eval-mlogloss:0.78486
[1613]	train-mlogloss:0.04868	eval-mlogloss:0.78486
[1614]	train-mlogloss:0.04860	eval-mlogloss:0.78484
[1615]	train-mlogloss:0.04853	eval-mlogloss:0.78484
[1616]	train-mlogloss:0.04844	eval-mlogloss:0.78478
[1617]	train-mlogloss:0.04834	eval-mlogloss:0.78480
[1618]	train-mlogloss:0.04829	eval-mlogloss:0.78479
[1619]	train-mlogloss:0.04825	eval-mlogloss:0.78477
[1620]	train

[1759]	train-mlogloss:0.04050	eval-mlogloss:0.78372
[1760]	train-mlogloss:0.04046	eval-mlogloss:0.78372
[1761]	train-mlogloss:0.04041	eval-mlogloss:0.78372
[1762]	train-mlogloss:0.04034	eval-mlogloss:0.78372
[1763]	train-mlogloss:0.04029	eval-mlogloss:0.78372
[1764]	train-mlogloss:0.04023	eval-mlogloss:0.78373
[1765]	train-mlogloss:0.04014	eval-mlogloss:0.78372
[1766]	train-mlogloss:0.04009	eval-mlogloss:0.78373
[1767]	train-mlogloss:0.04001	eval-mlogloss:0.78370
[1768]	train-mlogloss:0.03997	eval-mlogloss:0.78368
[1769]	train-mlogloss:0.03992	eval-mlogloss:0.78371
[1770]	train-mlogloss:0.03987	eval-mlogloss:0.78371
[1771]	train-mlogloss:0.03985	eval-mlogloss:0.78371
[1772]	train-mlogloss:0.03982	eval-mlogloss:0.78371
[1773]	train-mlogloss:0.03979	eval-mlogloss:0.78372
[1774]	train-mlogloss:0.03975	eval-mlogloss:0.78370
[1775]	train-mlogloss:0.03968	eval-mlogloss:0.78369
[1776]	train-mlogloss:0.03964	eval-mlogloss:0.78371
[1777]	train-mlogloss:0.03962	eval-mlogloss:0.78372
[1778]	train

In [None]:
count_label = 4

#データフレームに変換
y_train_df = pd.DataFrame(y_train)
y_test_df = pd.DataFrame(y_test, columns = ["State"])
y_train_df = pd.DataFrame(y_train, columns = ["State"])

#該当する目的変数を持ったインデックスを格納するリスト
DataIndex = []
# ↑に対応するtrainデータのラベルインデックス
PredictedLabel_index = []
# ラベルごとのSHAP値
SHAP_EachLabel = []

for i in range(count_label):
    DataIndex.append(list(y_train_df[y_train_df["State"]==i].index))
    PredictedLabel_index.append(X_train.drop(DataIndex[i]))
    SHAP_EachLabel.append(explainer.shap_values(PredictedLabel_index[i]))

#各ｃｈの電圧値とSHAP値の線形グラフ
#shap.dependence_plot(ind = "O2", shap_values=shap_values_0_train, features=X_train_0 )

In [None]:
SHAP_EachLabel.append(explainer.shap_values(PredictedLabel_index[i]))
print(SHAP_Eaachlabel)

In [None]:
#各ｃｈの電圧値とSHAP値の相関係数
# 空のデータフレームを作成
correlation_df = pd.DataFrame(columns=['feature', 'correlation_0', 'correlation_1'])
data = []

# すべての特徴についてループ
for feature in X_train.columns:
    # 分類結果が0のときのSHAP値と特徴量の値を取得
    shap_values_0 = shap_values_0_train[:, X_train_0.columns.get_loc(feature)]
    feature_values_0 = X_train_0[feature]
    # 分類結果が1のときのSHAP値と特徴量の値を取得
    shap_values_1 = shap_values_1_train[:, X_train_1.columns.get_loc(feature)]
    feature_values_1 = X_train_1[feature]
    
    # NumPyのcorrcoef関数を用いて相関係数を計算
    correlation_coefficient_0 = np.corrcoef(shap_values_0, feature_values_0)[0, 1]
    correlation_coefficient_1 = np.corrcoef(shap_values_1, feature_values_1)[0, 1]
    
    # リストに相関係数を追加
    data.append({'feature': feature, 
                 'correlation_0': correlation_coefficient_0,
                 'correlation_1': correlation_coefficient_1})

# pandas.concatを用いてデータフレームを作成
correlation_df = pd.concat([correlation_df, pd.DataFrame(data)], ignore_index=True).T

# データフレームを表示
correlation_df

In [None]:
"""
#１秒ごとに分割
# ラベルの作成（5秒閉じ、5秒開け）
labels_close = [0] * 5 *64 
labels_open = [1] * 5 * 64
labels = labels_close + labels_open

#labels = df.iloc[:, -1]

#14chデータ
n_samples_per_second = 4
total_seconds = 10  # 全体の秒数
#total_seconds = 3242  # 全体の秒数

# データとラベルを1秒ごとに分割
#X = np.array([df.iloc[i*n_samples_per_second:(i+1)*n_samples_per_second, :-1].values for i in range(total_seconds)])

X = np.array([df.iloc[i*n_samples_per_second:(i+1)*n_samples_per_second, :-1].values for i in range(total_seconds*64)])
#labels = np.array([stats.mode(y[i*n_samples_per_second:(i+1)*n_samples_per_second])[0][0] for i in range(total_seconds*64)])


# 1秒ごとのデータの平均を取得
X = X.mean(axis=1)

#XGBoost
XGBoost(X, labels)

#plot
#plot_importance(raw, model1)

"""