# Cross-Session on Multiple Datasets
Author: LC.Pan  
Date: 2024-06-24  

In [None]:
import os, time
import json
import numpy as np
import itertools
import multiprocessing as mp
from joblib import Parallel, delayed, parallel_backend
from sklearn.model_selection import RepeatedStratifiedKFold
from contextlib import redirect_stdout, redirect_stderr
import torch

# 私有工具库
from loaddata import Dataset_Left_Right_MI
from deep_learning.dl_classifier import DL_Classifier
from pre_processing.preprocessing import Pre_Processing
from transfer_learning.tl_classifier import TL_Classifier
from transfer_learning import TLSplitter, encode_datasets


Loading Dataset

In [None]:
# 设置参数
dataset_name = 'Pan2023'
fs = 250
freqband = [8,30]
datapath = r'E:\工作进展\小论文2023会议\数据处理python\datasets'

# 加载数据
dataset = Dataset_Left_Right_MI(dataset_name,fs,fmin=freqband[0],fmax=freqband[1],tmin=0,tmax=4,path=datapath)

# for sub in dataset.subjects:
#     print(f"Subject {sub}...")
#     # 加载数据
#     data = dataset.get_data()

sub = [1]
data,label,info = dataset.get_data(sub)

In [None]:
session_values = info['session'].unique()
print('the session values are:',session_values)
session_indices = info.groupby('session').apply(lambda x: x.index.tolist())

# 将结果转换为字典，键为不同值，值为对应的索引列表
session_index_dict = dict(zip(session_values, session_indices))

Data, Label=[], []
for session in session_values[:2]:
    Data.append(data[session_index_dict[session]])
    Label.append(label[session_index_dict[session]])

X, y_enc, domain =encode_datasets(Data, Label)
print(X.shape, y_enc.shape, len(domain))
print(domain)

target_domain = domain[-1]

设置基于迁移学习的跨会话交叉验证评估索引

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit
cv = StratifiedShuffleSplit(n_splits=3, random_state=42)
tl_cv = TLSplitter(target_domain=target_domain, cv=cv, no_calibration=False)
train_size = 30

if train_size == 0:
    tl_cv.no_calibration = True
else:
    tl_cv.cv.train_size = train_size

for train, test in tl_cv.split(X, y_enc):
    print(len(train), len(test))

Create Pipelines

In [None]:
from joblib import Memory

# 设置缓存目录
cachedir = '../my_cache_directory'
memory = Memory(cachedir, verbose=0)

preprocess = Pre_Processing(fs_new=160, fs_old=250, 
                       n_channels=None, 
                       start_time=0.5, end_time=3.5,
                       lowcut=None, highcut=None, )

Model = TL_Classifier(dpa_method='EA', 
                      fee_method='CSP', 
                      fes_method='MIC-K', 
                      clf_method='SVM',
                      pre_est=preprocess.process,
                      memory=memory,
                      target_domain=target_domain,
                      )

In [None]:
from transfer_learning.algorithms import  Algorithms

# 设置缓存目录
cachedir = '../my_cache_directory'

preprocess = Pre_Processing(fs_new=160, fs_old=250, 
                       n_channels=None, 
                       start_time=0.5, end_time=3.5,
                       lowcut=None, highcut=None, )


Evaluating cross-session performance

In [None]:
from sklearn.model_selection import cross_val_score, cross_validate

scores = cross_validate(Model, X, y_enc, cv=tl_cv, n_jobs=10)

In [None]:
train_time = scores['fit_time']
test_time = scores['score_time']
test_score = scores['test_score']
print('train time: %.3f s, test time: %.3f s' % (train_time.mean(), test_time.mean()))
print('test score: %.3f +/- %.3f' % (test_score.mean(), test_score.std()))

In [None]:
import itertools

DPA_METHODS = ['TLDummy', 'EA', 'RA', 'RPA']
FEE_METHODS = [None,'CSP', 'TRCSP', 'MDM', 'FGMDM', 'TS']
FES_METHODS = [None, 'ANOVA-F-K', 'ANOVA-F-P', 'MIC-K', 'MIC-P', 'PCA', 'LASSO', 'RFE', 'RFECV']
CLF_METHODS = [None, 'SVM', 'LDA', 'LR', 'KNN', 'DTC', 'RFC', 'ETC', 'ABC', 'GBC', 'GNB', 'MLP', 'XGBoost', 'CatBoost', 'LightGBM']
END_METHODS = [None,'RKNN', 'RKSVM', 'ABC-MDM', 'ABC-FGMDM', 'ABC-TSSVM', 'ABC-TSLDA', 'ABC-TSLR', 'MDWM', 'MEKT']
END_TO_END_METHODS = [None,'TRCA', 'DCPM', 'SBLEST']

# 所有方法的组合
all_alg_list = 

## 挑选的部分方法组合
dpa_list = [0, 1, 2, 3] # 0: TLDummy, 2: EA, 3: RA, 4: RPA
fee_list = [0, 1, 5] # 0: None, 1: CSP, 5: TS
fes_list = [0, 4, 6, 8] # 0: None, 4: MIC-P, 6: LASSO, 8: RFECV
clf_list = [0, 1, 2, 3, 11] # 0: None, 1: SVM, 2: LDA, 3: LR, 11: MLP
end_list = [0, 5, 6, 7, 8, 9] # 0: None, 5: ABC-TSSVM, 6: ABC-TSLDA, 7: ABC-TSLR, 8: MDWM, 9: MEKT
end_to_end_list = [0, 1, 2, 3] # 0: None, 1: TRCA, 2: DCPM, 3: SBLEST

all_clf_list1 = list(itertools.product(
    dpa_list, # 0: TLDummy, 2: EA, 3: RA, 4: RPA
    fee_list, # 0: None, 1: CSP, 5: TS
    fes_list, # 0: None, 4: MIC-P, 6: LASSO, 8: RFECV
    clf_list, # 0: None, 1: SVM, 2: LDA, 3: LR, 11: MLP
    [0], # 0: None
    [0], # 0: None
    ))

all_clf_list2 = list(itertools.product(
    dpa_list, 
    [0], # 0: None
    [0], # 0: None
    [0], # 0: None
    end_list, # 0: None, 5: ABC-TSSVM, 6: ABC-TSLDA, 7: ABC-TSLR, 8: MDWM, 9: MEKT
    [0], # 0: None
    ))

all_clf_list3 = list(itertools.product(
    dpa_list, 
    [0], # 0: None
    [0], # 0: None
    [0], # 0: None
    [0], # 0: None
    end_to_end_list, # 0: None, 1: TRCA, 2: DCPM, 3: SBLEST
    ))

all_clf_list = all_clf_list1 + all_clf_list2 + all_clf_list3

# 将all_clf_list中的所有方法编号转换为方法名称存入字典  
# 方法名称串联：编号
algorithm_dict = {}
for i in range(len(all_clf_list)):  
    # 关键字为对应各方法名称的串联（用“-”分隔） ，值为方法编号列表 

In [None]:
import pandas as pd

# 读取Excel文件
df = pd.read_(r'transfer_learning\algorithms_part.csv', sheet_name='algor-list')

# 提取G5:G154和V5:V154的数据
g_values = df['KEY'].iloc[0:150]  # 0基数，所以从4开始到155结束
v_values = df['ID'].iloc[0:150]

# 将v_values中的字符串转化为int类型
v_values = [eval(i) for i in v_values]

# 创建字典
key_value_pairs = dict(zip(g_values, v_values))

# 输出字典
print(key_value_pairs)

# 保存字典为csv文件
df = pd.DataFrame(key_value_pairs.items(), columns=['KEY', 'ID'])
df.to_csv('transfer_learning/key_value_pairs.csv', index=False)

# 读取csv文件
df = pd.read_csv('transfer_learning/key_value_pairs.csv')

# 输出csv文件内容
print(df)


In [None]:
import pandas as pd
from sklearn.model_selection import cross_validate
# 读取csv文件
df = pd.read_csv('transfer_learning/algorithms_part.csv')

# 遍历csv文件，计算每个方法的准确率
for i in range(100, len(df)):#range(len(df)):
    # 读取当前行的key和value
    key = df.loc[i, 'Algorithms']
    value = df.loc[i, 'Algorithm_ID']
    value = eval(value)
    # 实例化模型
    Model = Algorithms(
    algorithm_id=value, 
    target_domain=target_domain, 
    pre_processing=preprocess.process, 
    memory_location=cachedir
    )
    # 读取当前行的准确率
    scores = cross_validate(Model, X, y_enc, cv=tl_cv, n_jobs=-1)
    train_time = scores['fit_time']
    test_time = scores['score_time']
    test_score = scores['test_score']
    # 写入当前行的准确率
    df.loc[i, 'accuracy'] = test_score.mean()
    df.loc[i, 'train_time'] = train_time.mean()
    df.loc[i, 'test_time'] = test_time.mean()
    print(f"{i+1}. Method {key} has an accuracy of {test_score.mean():.2f} +/- {test_score.std():.2f}")

# 输出计算后的csv文件内容
print(df)


# 保存所有方法和准确率的到csv文件
df.to_csv('transfer_learning/algorithms_part.csv', index=False)

In [None]:
from sklearn.model_selection import cross_validate
value = [0,4,0,0,0,0]

# 实例化模型
Model = Algorithms(
algorithm_id=value, 
target_domain=target_domain, 
pre_processing=preprocess.process, 
memory_location=cachedir
)
# 读取当前行的准确率
acc = []
for train, test in tl_cv.split(X, y_enc):
    X_train, y_train = X[train], y_enc[train]
    X_test, y_test = X[test], y_enc[test]
    Model.fit(X_train, y_train)
    score = Model.score(X_test, y_test)
    acc.append(score)
    print("Score: %0.2f" % score)

In [None]:
df = pd.read_csv('transfer_learning/algorithms_part.csv')