### __Epochデータの作成・保存__

In [1]:
from utils import *

In [2]:
train = pd.read_csv(DATA_DIR / "train_records.csv")
test = pd.read_csv(DATA_DIR / "test_records.csv")

train.shape, test.shape

((108, 8), (45, 7))

In [4]:
sample_submission_df = pd.read_csv(SUBMISSION_DIR / "sample_submission.csv", parse_dates=[1])
sample_submission_df

Unnamed: 0,id,meas_time,condition
0,53c1555,1989-11-20 23:19:30,Sleep stage W
1,53c1555,1989-11-20 23:20:00,Sleep stage W
2,53c1555,1989-11-20 23:20:30,Sleep stage W
3,53c1555,1989-11-20 23:21:00,Sleep stage W
4,53c1555,1989-11-20 23:21:30,Sleep stage W
...,...,...,...
52291,9b444bb,1989-04-12 07:32:30,Sleep stage W
52292,9b444bb,1989-04-12 07:33:00,Sleep stage W
52293,9b444bb,1989-04-12 07:33:30,Sleep stage W
52294,9b444bb,1989-04-12 07:34:00,Sleep stage W


In [3]:
# パスを設定

train["hypnogram"] = train["hypnogram"].map(lambda x: str(EDF_DIR / x))
train["psg"] = train["psg"].map(lambda x: str(EDF_DIR / x))
test["psg"] = test["psg"].map(lambda x: str(EDF_DIR / x))

In [5]:
def read_and_set_annotation(record_df, include=None, is_test=False):
    whole_epoch_data = []

    for row_id, row in tqdm(record_df.iterrows(), total=len(record_df)):        
        # PSGファイルとHypnogram(アノテーションファイルを読み込む)
        psg_edf = mne.io.read_raw_edf(row["psg"], include=include, verbose=False)
        
        if not is_test:
            # 訓練データの場合
            annot = mne.read_annotations(row["hypnogram"])

            # 切り捨て
            truncate_start_point = 3600 * 5
            truncate_end_point = (len(psg_edf)/100) - (3600 *5)
            annot.crop(truncate_start_point, truncate_end_point, verbose=False)

            # アノテーションデータの切り捨て
            psg_edf.set_annotations(annot, emit_warning=False)
            events, _ = mne.events_from_annotations(psg_edf, event_id=RANK_LABEL2ID, chunk_duration=30., verbose=False)
            
            event_id = LABEL2ID
        else:
            # テストデータの場合
            start_psg_date = psg_edf.info["meas_date"]
            start_psg_date = start_psg_date.replace(tzinfo=None)

            test_start_time = sample_submission_df[sample_submission_df["id"]==row["id"]]["meas_time"].min()
            test_end_time = sample_submission_df[sample_submission_df["id"]==row["id"]]["meas_time"].max()
            
            truncate_start_point = int((test_start_time - start_psg_date).total_seconds())
            truncate_end_point = int((test_end_time- start_psg_date).total_seconds())+30
            
            event_range = list(range(truncate_start_point, truncate_end_point, 30))
            events = np.zeros((len(event_range), 3), dtype=int)
            events[:, 0] = event_range
            events = events * 100
            
            event_id = {'Sleep stage W': 0}

        # 30秒毎に1epochとする
        tmax = 30. - 1. / psg_edf.info['sfreq']
        epoch = mne.Epochs(raw=psg_edf, events=events, event_id=event_id, tmin=0, tmax=tmax, baseline=None, verbose=False, on_missing='ignore')
        
        # 途中でデータが落ちてないかチェック
        assert len(epoch.events) * 30 == truncate_end_point - truncate_start_point
        
        # メタデータを追加
        epoch.info["temp"] = {
            "id": row["id"],
            "subject_id": row["subject_id"],
            "night": row["night"],
            "age": row["age"],
            "sex": row["sex"],
            "truncate_start_point": truncate_start_point
        }

        whole_epoch_data.append(epoch)

    return whole_epoch_data 

In [6]:
# Epochデータセットの作成
train_epochs = read_and_set_annotation(train, is_test=False)
test_epochs = read_and_set_annotation(test, is_test=True)

  0%|          | 0/108 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

In [10]:
# pickleファイルに保存
with open(DATA_DIR / 'train_epochs.pickle', mode='wb') as f:
    pickle.dump(train_epochs, f)

with open(DATA_DIR / 'test_epochs.pickle', mode='wb') as f:
    pickle.dump(test_epochs, f)