# FBCNet + Xgboost, fbc充当特征提取器, Xgboost充当分类器

In [15]:
import torch
import os
import random
import numpy as np
import xgboost
from xgboost import XGBClassifier
import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn.model_selection import GridSearchCV

import transforms
from fbc import FBCNet
from eegDataset import eegDataset

## 1. 制作XGBoost数据集

In [2]:
def read_folder_npy_data(folder_path):
    '''
    读取lyh的数据
    '''
    data_dict = {}

    # Filter only .npy files
    npy_files = [f for f in os.listdir(folder_path) if f.endswith('.npy')]

    # Load .npy files
    for filename in npy_files:
        file_path = os.path.join(folder_path, filename)
        
        try:
            # Attempt to load the file
            content = np.load(file_path, allow_pickle=True)

            # Print information about the loaded file
            print(f"Loaded {filename}: Shape={content.shape}, Dtype={content.dtype}")
            
            # Add to the dictionary
            data_dict[filename] = content
        
        except Exception as e:
            # Print an error message if loading fails
            print(f"Error loading {filename}: {e}")

    return data_dict



In [3]:
lyh_data_path = "Emotiv_dataloader-main/data"
data_dict = read_folder_npy_data(lyh_data_path)

Loaded right2_orginal.npy: Shape=(100,), Dtype=object
Loaded left1_orginal.npy: Shape=(100,), Dtype=object
Loaded nothing2_orginal.npy: Shape=(100,), Dtype=object
Loaded left3_orginal.npy: Shape=(100,), Dtype=object
Loaded right3_orginal.npy: Shape=(100,), Dtype=object
Loaded leg2_orginal.npy: Shape=(100,), Dtype=object
Loaded right_processed.npy: Shape=(15, 300000), Dtype=float64
Loaded left2_orginal.npy: Shape=(100,), Dtype=object
Loaded left_processed.npy: Shape=(15, 300000), Dtype=float64
Loaded nothing3_orginal.npy: Shape=(100,), Dtype=object
Loaded leg1_orginal.npy: Shape=(100,), Dtype=object
Loaded nothing1_orginal.npy: Shape=(100,), Dtype=object
Loaded nothing_processed.npy: Shape=(15, 300000), Dtype=float64
Loaded leg3_orginal.npy: Shape=(100,), Dtype=object
Loaded leg_processed.npy: Shape=(15, 300000), Dtype=float64
Loaded right1_orginal.npy: Shape=(100,), Dtype=object


In [4]:
keys = ["nothing_processed.npy", "left_processed.npy", "right_processed.npy", "leg_processed.npy"]
train_data, train_labels, test_data, test_labels = [], [], [], []
for key in keys:
    d = [data_dict[key][0:14, i:i+1000] for i in range(0, data_dict[key].shape[1], 1000)]
    train_data += d[:int(len(d) * 0.8)]
    train_labels += [keys.index(key)] * len(d[:int(len(d) * 0.8)])
    test_data += d[int(len(d) * 0.8):]
    test_labels += [keys.index(key)] * len(d[int(len(d) * 0.8):])

# 为了适配tranforms.py里的filterBank类
train_data = [{'data': d} for d in train_data]
test_data = [{'data': d} for d in test_data]

In [5]:
## bandfilter!!!
filterTransform = {'filterBank':{'filtBank':[[4,8],[8,12],[12,16],[16,20],[20,24], \
    [24,28],[28,32],[32,36],[36,40]],'fs':250, 'filtType':'filter'}}

# Check and compose transforms
if len(filterTransform) >1 :
    transform = transforms.Compose([transforms.__dict__[key](**value) for key, value in filterTransform.items()])
else:
    transform = transforms.__dict__[list(filterTransform.keys())[0]](**filterTransform[list(filterTransform.keys())[0]])

In [6]:
train_data = eegDataset(train_data, train_labels, transform=transform)
test_data = eegDataset(test_data, test_labels, transform=transform)

In [19]:
X_train, y_train, X_test, y_test = [], [], [], []

# fbc extractor, input shape: batch x 1 x chan x time x filterBand
features_extractor = FBCNet(nChan=14, nTime=1000, nClass=4)
# weight_path = os.getcwd() +"/best_model_lyh_4cls.pth"  
# features_extractor.load_state_dict(torch.load(weight_path, map_location=torch.device('cpu'))["model_state_dict"])

for d in train_data:
    i = d['data'].reshape(1, 1, 14, 1000, 9)
    features = features_extractor(i)[1]
    features = features.squeeze().flatten()
    X_train.append(features.detach().numpy())
    y_train.append(d['label'])

for d in test_data:
    i = d['data'].reshape(1, 1, 14, 1000, 9)
    features = features_extractor(i)[1]
    features = features.squeeze().flatten()
    X_test.append(features.detach().numpy())
    y_test.append(d['label'])



In [20]:
# 打乱数据集

combined_data = list(zip(X_train, y_train))
random.shuffle(combined_data)
X_train, y_train = zip(*combined_data)

combined_data = list(zip(X_test, y_test))
random.shuffle(combined_data)
X_test, y_test = zip(*combined_data)

X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

In [21]:
X_train.shape

(960, 1152)

## 2. 训练
### 2.1 XGBoost

In [17]:
# xgboost train
xgb_model = XGBClassifier()
xgb_model = xgb_model.fit(X_train, y_train)

# fbc extractor: best_model_lyh_4cls  + XGBoost
y_train_pred = xgb_model.predict(X_train)
acc_train = (y_train == y_train_pred).sum() / len(y_train)

y_test_pred = xgb_model.predict(X_test)
acc_test = (y_test == y_test_pred).sum() / len(y_test)
acc_train, acc_test

(1.0, 0.37083333333333335)

In [24]:
xgb_model = XGBClassifier()
xgb_model = xgb_model.fit(X_train, y_train)

# fbc extractor: random init  + XGBoost
y_train_pred = xgb_model.predict(X_train)
acc_train = (y_train == y_train_pred).sum() / len(y_train)

y_test_pred = xgb_model.predict(X_test)
acc_test = (y_test == y_test_pred).sum() / len(y_test)
acc_train, acc_test

(1.0, 0.35)

### 2.2 lightgbm

In [60]:
# lgb_params = {
#         'boosting_type': 'gbdt',
#         'objective': 'regression',
#         'metric': 'mae',
#         'min_child_weight': 5,
#         'num_leaves': 2 ** 5,
#         'lambda_l2': 10,
#         'feature_fraction': 0.8,
#         'bagging_fraction': 0.8,
#         'bagging_freq': 4,
#         'learning_rate': 0.05,
#         'seed': 2023,
#         'nthread' : 16,
#         'verbose' : -1,
#     }

# no_info = lgb.callback.log_evaluation(period=-1) # 禁用训练日志输出。

In [61]:
# lgb_train = lgb.Dataset(X_train, label=y_train)
# lgb_test = lgb.Dataset(X_test, label=y_test)

In [63]:
# lgb_model = lgb.train(lgb_params, lgb_train, 200, valid_sets=lgb_test, callbacks=[no_info])

In [25]:
lgb_model = LGBMClassifier()
lgb_model.fit(X_train, y_train)

# fbc extractor: best_model_lyh_4cls  + LGBMClassifier
y_train_pred = lgb_model.predict(X_train)
acc_train = (y_train == y_train_pred).sum() / len(y_train)

y_test_pred = lgb_model.predict(X_test)
acc_test = (y_test == y_test_pred).sum() / len(y_test)
acc_train, acc_test

(1.0, 0.35)

In [26]:
lgb_model = LGBMClassifier()
lgb_model.fit(X_train, y_train)

# fbc extractor: random init  + LightGBM
y_train_pred = lgb_model.predict(X_train)
acc_train = (y_train == y_train_pred).sum() / len(y_train)

y_test_pred = lgb_model.predict(X_test)
acc_test = (y_test == y_test_pred).sum() / len(y_test)
acc_train, acc_test

(1.0, 0.35)

In [None]:
# fbc extractor: best_model_lyh_4cls  + lightgbm
# y_train_pred = lgb_model.predict(X_train, num_iteration=lgb_model.best_iteration)
# acc_train = (y_train == np.round(y_train_pred)).sum() / len(y_train)

# y_test_pred = lgb_model.predict(X_test, num_iteration=lgb_model.best_iteration)
# acc_test = (y_test == np.round(y_test_pred)).sum() / len(y_test)
# acc_train, acc_test

## 3. 调参

In [17]:
## 定义参数取值范围
learning_rate = [0.1, 0.3, 0.6]
feature_fraction = [0.5, 0.8, 1]
num_leaves = [16, 32, 64]
max_depth = [-1,3,5,8]

parameters = { 'learning_rate': learning_rate,
              'feature_fraction':feature_fraction,
              'num_leaves': num_leaves,
              'max_depth': max_depth}
model = LGBMClassifier(n_estimators = 50)

## 进行网格搜索
clf = GridSearchCV(model, parameters, cv=3, scoring='accuracy',verbose=3, n_jobs=-1)
clf = clf.fit(X_train, y_train)

Fitting 3 folds for each of 108 candidates, totalling 324 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:   25.0s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:  8.3min
[Parallel(n_jobs=-1)]: Done 324 out of 324 | elapsed: 10.1min finished




In [18]:
clf.best_params_

{'feature_fraction': 0.5,
 'learning_rate': 0.1,
 'max_depth': -1,
 'num_leaves': 16}

In [19]:
# fbc extractor: best_model_lyh_4cls  + LGBMClassifier(GridSearchCV调参)
y_train_pred = clf.predict(X_train)
acc_train = (y_train == y_train_pred).sum() / len(y_train)

y_test_pred = clf.predict(X_test)
acc_test = (y_test == y_test_pred).sum() / len(y_test)
acc_train, acc_test

(1.0, 0.37083333333333335)