In [None]:
!pip install cesium

In [None]:
from cesium import datasets

In [None]:
# EEG : 뇌전도(electroencephalogram)
# 이 데이터를 사용한 논문 : https://perma.cc/YZD5-CTJF
# 눈을 뜨거나 감은 (두 개의 별도 범주) 건강한 사람에 대한 EEG 기록
# 발작이 발생하지 않은 간질 환자의 발작과는 관련이 없는 두뇌의 두뇌의 두 부위에 대한 EEG 기록(두 개의 별도 범주)
# 발작 중 뇌파 내 EEG 기록(단일범주)

eeg = datasets.fetch_andrzejak()

In [None]:
import matplotlib.pyplot as plt

In [None]:
# 일부 샘플을 가지고 와서 그림을 그려본다
plt.subplot(3,1,1)
plt.plot(eeg["measurements"][0])
plt.legend(eeg['classes'][0])
plt.subplot(3,1,2)
plt.plot(eeg["measurements"][300])
plt.legend(eeg['classes'][300])
plt.subplot(3,1,3)
plt.plot(eeg["measurements"][450])
plt.legend(eeg['classes'][450])

In [None]:
# cesium으로 특징을 생성하는 방법
from cesium.featurize import featurize_time_series as ft

features_to_use = ["amplitude",
                   "percent_beyond_1_std",
                   "percent_close_to_median",
                   "skew",
                   "max_slope"]

fset_cesium = ft(times = eeg["times"],
                 values = eeg["measurements"],
                 errors = None,
                 features_to_use = features_to_use,
                 scheduler = None)

In [None]:
fset_cesium.head()

In [None]:
import numpy as np
np.std(eeg["measurements"][0])

In [None]:
np.mean(eeg["measurements"][0])

In [None]:
sample_ts = eeg["measurements"][0]
sz = len(sample_ts)
ll = -4.13 - 40.4
ul = -4.13 + 40.4
quals = [i for i in range(sz) if sample_ts[i] < ll or sample_ts[i] > ul]

In [None]:
len(quals)/sz

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(fset_cesium.values, eeg["classes"], random_state=21)

In [None]:
## 1. Random Forest ##
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html

from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 3,
                                random_state = 21)
rf_clf.fit(X_train, y_train)

In [None]:
# 학습에 사용되지 않은 데이터에 대한 정확도를 반환
rf_clf.score(X_test, y_test)

In [None]:
## XGBoost ##

import xgboost as xgb
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
                            max_depth = 3,
                            random_state = 21)

xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)

In [None]:
# 학습에 소요되는 시간 반환

import time

start = time.time()
xgb_clf.fit(X_train, y_train)
end = time.time()
end - start

In [None]:
start = time.time()
rf_clf.fit(X_train, y_train)
end = time.time()
end - start

In [None]:
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
                            max_depth = 2,
                            random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)

In [None]:
rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 2,
                                random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)

In [None]:
xgb_clf = xgb.XGBClassifier(n_estimators = 10,
                            max_depth = 1,
                            random_state = 21)
xgb_clf.fit(X_train, y_train)
xgb_clf.score(X_test, y_test)

In [None]:
rf_clf = RandomForestClassifier(n_estimators = 10,
                                max_depth = 1,
                                random_state = 21)
rf_clf.fit(X_train, y_train)
rf_clf.score(X_test, y_test)