See the following repositories for details  
https://github.com/tubo213/kaggle-child-mind-institute-detect-sleep-states

# Install additional libraries

In [1]:
!pip install hydra-core segmentation_models_pytorch==0.3.3 --no-index --find-links=/kaggle/input/ex-library

Looking in links: /kaggle/input/ex-library
Processing /kaggle/input/ex-library/hydra_core-1.3.2-py3-none-any.whl
Processing /kaggle/input/ex-library/segmentation_models_pytorch-0.3.3-py3-none-any.whl
Processing /kaggle/input/ex-library/pretrainedmodels-0.7.4.tar.gz (from segmentation_models_pytorch==0.3.3)
  Preparing metadata (setup.py) ... [?25l- done
[?25hProcessing /kaggle/input/ex-library/efficientnet_pytorch-0.7.1.tar.gz (from segmentation_models_pytorch==0.3.3)
  Preparing metadata (setup.py) ... [?25ldone
[?25hProcessing /kaggle/input/ex-library/timm-0.9.2-py3-none-any.whl (from segmentation_models_pytorch==0.3.3)
Processing /kaggle/input/ex-library/munch-4.0.0-py2.py3-none-any.whl (from pretrainedmodels==0.7.4->segmentation_models_pytorch==0.3.3)
Processing /kaggle/input/ex-library/omegaconf-2.3.0-py3-none-any.whl (from hydra-core)
Processing /kaggle/input/ex-library/antlr4-python3-runtime-4.9.3.tar.gz (from hydra-core)
  Preparing metadata (setup.py) ... [?2

In [2]:
%cd /kaggle/input/sleep-test/kaggle-child-mind-institute-detect-sleep-states/

/kaggle/input/sleep-test/kaggle-child-mind-institute-detect-sleep-states


In [3]:
# %cd /kaggle/input/cmi-code/kaggle-child-mind-institute-detect-sleep-states/

In [4]:
%ls

README.md  [0m[01;34mdata[0m/      pyproject.toml         requirements.lock  [01;34msrc[0m/
[01;34mbin[0m/       [01;34mnotebook[0m/  requirements-dev.lock  [01;34mrun[0m/               [01;34mtools[0m/


# Config

In [5]:
# Config
DURATION = 5760
DOWNSAMPLE_RATE = 2
PHASE = 'test'
EXP_NAME = 'exp001'

# Preprocess

In [6]:
!python -m run.prepare_data dir=kaggle phase=$PHASE

[0.1GB(+0.0GB):0.1sec] Load series 
  0%|                                                     | 0/3 [00:00<?, ?it/s][2023-12-04 08:34:22,104][numexpr.utils][INFO] - NumExpr defaulting to 4 threads.
100%|█████████████████████████████████████████████| 3/3 [00:00<00:00,  3.93it/s]
[0.1GB(+0.1GB):0.8sec] Save features 


# Inference
change seed

In [7]:
!python -m run.inference\
    dir=kaggle\
    model.encoder_name=resnet34\
    model.encoder_weights=null\
    num_workers=2\
    exp_name=$EXP_NAME\
    weight.run_name=single\
    batch_size=64\
    duration=$DURATION\
    downsample_rate=$DOWNSAMPLE_RATE\
    post_process.score_th=0.005\
    post_process.distance=40\
    phase=$PHASE\
    seed=42

[2023-12-04 08:34:44,364][lightning_fabric.utilities.seed][INFO] - Global seed set to 42
[1.5GB(+0.0GB):0.0sec] load test dataloader 
load weight from "/kaggle/input/cmi-model/exp001/single/best_model.pth"
[3.6GB(+2.1GB):8.3sec] load model 
inference: 100%|██████████████████████████████████| 1/1 [00:06<00:00,  6.30s/it]
[4.7GB(+1.1GB):6.4sec] inference 
----------
make npy
[4.8GB(+0.0GB):0.0sec] make submission 


In [8]:
# # いらないファイル、フォルダ削除
# !rm -rf /kaggle/working/processed_data
# !rm -rf /kaggle/working/output

In [9]:
!ls /kaggle/working/

__notebook__.ipynb  keys_42.txt  preds_42.npy	 submission.csv
ids_42.npy	    output	 processed_data


In [10]:
!python -m run.inference\
    dir=kaggle\
    model.encoder_name=resnet34\
    model.encoder_weights=null\
    num_workers=2\
    exp_name=$EXP_NAME\
    weight.run_name=single\
    batch_size=64\
    duration=$DURATION\
    downsample_rate=$DOWNSAMPLE_RATE\
    post_process.score_th=0.005\
    post_process.distance=40\
    phase=$PHASE\
    seed=30

[2023-12-04 08:35:14,510][lightning_fabric.utilities.seed][INFO] - Global seed set to 30
[1.5GB(+0.0GB):0.0sec] load test dataloader 
load weight from "/kaggle/input/cmi-model/exp001/single/best_model.pth"
[3.6GB(+2.1GB):3.4sec] load model 
inference: 100%|██████████████████████████████████| 1/1 [00:02<00:00,  2.18s/it]
[4.8GB(+1.1GB):2.2sec] inference 
----------
make npy
[4.8GB(+0.0GB):0.0sec] make submission 


In [11]:
!ls /kaggle/working/

__notebook__.ipynb  ids_42.npy	 keys_42.txt  preds_30.npy  processed_data
ids_30.npy	    keys_30.txt  output       preds_42.npy  submission.csv


# Seed ensemble

In [12]:
import numpy as np
with open("/kaggle/working/keys_30.txt", 'r') as file:
    keys_30 = file.read().splitlines()
preds_30 = np.load("/kaggle/working/preds_30.npy")

In [13]:
with open("/kaggle/working/keys_42.txt", 'r') as file:
    keys_42 = file.read().splitlines()
preds_42 = np.load("/kaggle/working/preds_42.npy")

In [14]:
preds = (preds_30 + preds_42)/2
keys = keys_30

# make submission file

In [15]:
import polars as pl
from scipy.signal import find_peaks

def post_process_for_seg(
    keys: list[str], preds: np.ndarray, score_th: float = 0.01, distance: int = 5000
) -> pl.DataFrame:
    """make submission dataframe for segmentation task

    Args:
        keys (list[str]): list of keys. key is "{series_id}_{chunk_id}"
        preds (np.ndarray): (num_series * num_chunks, duration, 2)
        score_th (float, optional): threshold for score. Defaults to 0.5.

    Returns:
        pl.DataFrame: submission dataframe
    """
    series_ids = np.array(list(map(lambda x: x.split("_")[0], keys)))
    unique_series_ids = np.unique(series_ids)

    records = []
    for series_id in unique_series_ids:
        series_idx = np.where(series_ids == series_id)[0]
        this_series_preds = preds[series_idx].reshape(-1, 2)

        for i, event_name in enumerate(["onset", "wakeup"]):
            this_event_preds = this_series_preds[:, i]
            steps = find_peaks(this_event_preds, height=score_th, distance=distance)[0]
            scores = this_event_preds[steps]

            for step, score in zip(steps, scores):
                records.append(
                    {
                        "series_id": series_id,
                        "step": step,
                        "event": event_name,
                        "score": score,
                    }
                )

    if len(records) == 0:  # 一つも予測がない場合はdummyを入れる
        records.append(
            {
                "series_id": series_id,
                "step": 0,
                "event": "onset",
                "score": 0,
            }
        )

    sub_df = pl.DataFrame(records).sort(by=["series_id", "step"])
    row_ids = pl.Series(name="row_id", values=np.arange(len(sub_df)))
    sub_df = sub_df.with_columns(row_ids).select(["row_id", "series_id", "step", "event", "score"])
    return sub_df



In [16]:
def make_submission(
    keys: list[str], preds: np.ndarray, downsample_rate, score_th, distance
) -> pl.DataFrame:
    sub_df = post_process_for_seg(
        keys,
        preds[:, :, [1, 2]],  # type: ignore
        score_th=score_th,
        distance=distance,  # type: ignore
    )

    return sub_df

In [17]:
sub_df = make_submission(
            keys,
            preds,
            downsample_rate=2,
            score_th=0.005,
            distance=40,
        )

In [18]:
# sub_df = post_process_for_seg(common_ids, preds, score_th=0.005, distance=40)

In [19]:
# いらないファイル、フォルダ削除
!rm -rf /kaggle/working/processed_data
!rm -rf /kaggle/working/output
!rm -rf /kaggle/working/*.npy
!rm -rf /kaggle/working/*.txt

In [20]:
sub_df.write_csv("/kaggle/working/submission.csv")

In [21]:
# !ls /kaggle/working/

In [22]:
# sub_df