In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import time
from zipfile import ZipFile
import warnings
import pickle
import torch
from torch.utils.data import Dataset
import tsfel
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import lightgbm as lgb
from hiclass import LocalClassifierPerNode, LocalClassifierPerParentNode, LocalClassifierPerLevel
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
warnings.filterwarnings('ignore')

from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Prepare labels

In [3]:
train_y = pd.read_csv("../downloads/train_y_v0.1.0.csv")

In [10]:
level_labels = [
    ['Alarm', 'Command', 'Parameter', 'Sensor', 'Setpoint', 'Status'],
    ['Reset_Setpoint',
    'Usage_Sensor',
    'Pressure_Sensor',
    'Flow_Setpoint',
    'Static_Pressure_Setpoint',
    'Angle_Sensor',
    'Humidity_Setpoint',
    'Temperature_Sensor',
    'Temperature_Setpoint',
    'Supply_Air_Humidity_Sensor',
    'Outside_Air_CO2_Sensor',
    'Differential_Pressure_Setpoint',
    'Damper_Position_Setpoint',
    'Heating_Demand_Setpoint',
    'Cooling_Demand_Setpoint',
    'Current_Sensor',
    'Wind_Speed_Sensor',
    'Flow_Sensor',
    'Dew_Point_Setpoint',
    'Zone_Air_Dewpoint_Sensor',
    'Power_Sensor',
    'Position_Sensor',
    'Solar_Radiance_Sensor',
    'Duration_Sensor',
    'Time_Setpoint',
    'Discharge_Air_Dewpoint_Sensor',
    'Wind_Direction_Sensor',
    'Voltage_Sensor',
    'Zone_Air_Humidity_Sensor',
    'Demand_Sensor',
    'Speed_Setpoint',
    'Rain_Sensor',
    'Frequency_Sensor',
    'Outside_Air_Humidity_Sensor',
    'Outside_Air_Enthalpy_Sensor'],
    ['Air_Flow_Sensor',
    'Water_Temperature_Setpoint',
    'Water_Flow_Sensor',
    'Electrical_Power_Sensor',
    'Zone_Air_Humidity_Setpoint',
    'Heating_Temperature_Setpoint',
    'Air_Flow_Setpoint',
    'Energy_Usage_Sensor',
    'Supply_Air_Static_Pressure_Setpoint',
    'Air_Temperature_Sensor',
    'Valve_Position_Sensor',
    'Cooling_Temperature_Setpoint',
    'Water_Temperature_Sensor',
    'Load_Current_Sensor',
    'Damper_Position_Sensor',
    'Static_Pressure_Sensor',
    'Air_Temperature_Setpoint',
    'Thermal_Power_Sensor',
    'Differential_Pressure_Sensor'],
    ['Supply_Air_Temperature_Sensor',
    'Discharge_Air_Temperature_Sensor',
    'Discharge_Water_Temperature_Sensor',
    'Zone_Air_Temperature_Sensor',
    'Supply_Air_Static_Pressure_Sensor',
    'Outside_Air_Temperature_Setpoint',
    'Supply_Air_Temperature_Setpoint',
    'Chilled_Water_Supply_Flow_Sensor',
    'Chilled_Water_Supply_Temperature_Sensor',
    'Peak_Power_Demand_Sensor',
    'Room_Air_Temperature_Setpoint',
    'Hot_Water_Supply_Temperature_Sensor',
    'Active_Power_Sensor',
    'Min_Air_Temperature_Setpoint',
    'Return_Air_Temperature_Sensor',
    'Hot_Water_Flow_Sensor',
    'Chilled_Water_Differential_Temperature_Sensor',
    'Filter_Differential_Pressure_Sensor',
    'Max_Air_Temperature_Setpoint',
    'Cooling_Supply_Air_Temperature_Deadband_Setpoint',
    'Outside_Air_Temperature_Sensor',
    'Heating_Supply_Air_Temperature_Deadband_Setpoint',
    'Discharge_Air_Temperature_Setpoint',
    'Return_Water_Temperature_Sensor',
    'Reactive_Power_Sensor'],
    ['Low_Outside_Air_Temperature_Enable_Setpoint',
    'Cooling_Demand_Sensor',
    'Chilled_Water_Return_Temperature_Sensor',
    'Average_Zone_Air_Temperature_Sensor',
    'Warmest_Zone_Air_Temperature_Sensor',
    'Heating_Demand_Sensor',
    'Differential_Supply_Return_Water_Temperature_Sensor',
    'Hot_Water_Return_Temperature_Sensor',
    'Outside_Air_Lockout_Temperature_Setpoint']
]

In [12]:
def get_active_labels_np(row):
    """More efficient version using numpy"""
    arr = row.to_numpy() # convert to numpy array
    indices = np.where(arr == 1)[0] # get indices where value is 1
    labels = row.index[indices].tolist() # get labels from indices
    return labels

labelhir = train_y.apply(get_active_labels_np, axis=1).tolist()

In [11]:
tiers = {
    1: level_labels[0],
    2: level_labels[1],
    3: level_labels[2],
    4: level_labels[3],
    5: level_labels[4]
}

def get_tier(label):
    for tier_num, tier_list in tiers.items():
        if label in tier_list:
            return tier_num
    return None  # Handle cases where the label isn't found in any tier

def sort_labels(labels):
    return sorted(labels, key=lambda label: (get_tier(label) or float('inf'), label))


In [13]:
sorted_labelhir = [sort_labels(labels) for labels in labelhir]

label_hier = np.array(sorted_labelhir, dtype=object)

padded_label = pd.Series(label_hier).apply(lambda x: x + ['None'] * (5 - len(x)) if len(x) < 5 else x)

# Data Split

In [14]:
def create_folds(train_y, n_splits=3):
    # Create a label array for stratification
    # We'll use the first non-zero label for each row as the stratification target
    stratify_labels = []
    for _, row in train_y.iterrows():
        labels = row[train_y.columns != 'filename'].values
        # Get first non-negative label, or 0 if all negative
        first_positive = next((i for i, x in enumerate(labels) if x >= 0), 0)
        stratify_labels.append(first_positive)
    
    # Create StratifiedKFold object
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # Generate fold indices
    folds = []
    for train_idx, val_idx in skf.split(train_y, stratify_labels):
        folds.append({
            'train': train_idx,
            'val': val_idx
        })
    
    return folds

In [15]:
folds = create_folds(train_y)

# Load data

In [8]:
raw_train = pd.read_csv("../downloads/train_data_features_v3_fixed/train_features_full_v3.csv")
raw_train = raw_train.drop(columns=["Unnamed: 0"])

train_sets = [
    "../downloads/train_data_features_v3_fixed/train_features_full_v3.csv",
    "../downloads/train_data_features_v3_fixed/train_features_full_v3.csv",
]

# Training function

In [16]:
from typing import List

def train_random_forest(
    train_X: List[pd.DataFrame],
    _label,
    folds,
    model_class,
    params: dict,
    none_ratio_thr: float,
):
    """
    Train random forest models using k-fold cross validation
    
    Args:
        train_X: Training features DataFrame
        _label: Array of labels
        folds: List of dictionaries containing train/val indices
        
    Returns:
        tuple: (list of trained classifiers, list of scores, list of validation predictions)
    """
    classifiers = []
    val_feat_df_list = []

    for f_idx, fold in enumerate(folds):
        # Prepare train and validation data for this fold
        train_X_fold_list = []
        train_y_fold_list = []
        for trn_x in train_X:
            train_X_fold_list.append(trn_x.iloc[fold['train']])
            train_y_fold_list.append(_label[fold['train']])

        train_X_fold = pd.concat(train_X_fold_list)
        train_y_fold = np.concatenate(train_y_fold_list)

        valid_X_fold_list = []
        valid_y_fold_list = []
        for trn_x in train_X:
            valid_X_fold_list.append(trn_x.iloc[fold['val']])
            valid_y_fold_list.append(_label[fold['val']])

        val_X_fold = pd.concat(valid_X_fold_list)
        val_y_fold = np.concatenate(valid_y_fold_list)
        
        # Check the train_y_fold. If more than 30% of samples are labeled "None",
        # randomly sample from the "None" to make that ratio no more than 30%.
        none_mask = (train_y_fold == "None")
        none_count = np.sum(none_mask)
        total_samples = len(train_y_fold)
        none_ratio = none_count / total_samples if total_samples > 0 else 0

        if none_ratio > none_ratio_thr:
            # Calculate how many "None" labels we should keep (30% of total)
            max_none_to_keep = int(none_ratio_thr * (total_samples - none_count))

            # Randomly choose which "None" labels to keep
            none_indices = np.where(none_mask)[0]

            # Fix the random seed before shuffling for reproducibility
            rng = np.random.RandomState(f_idx)
            rng.shuffle(none_indices)
            
            keep_none_indices = none_indices[:max_none_to_keep]

            # Indices of all non-"None" labels
            other_indices = np.where(~none_mask)[0]

            # Combine indices to keep and then sort
            new_indices = np.concatenate([keep_none_indices, other_indices])
            new_indices = np.sort(new_indices)  # Sort so we can index the DataFrame consistently

            # Subset the training data
            train_X_fold = train_X_fold.iloc[new_indices]
            train_y_fold = train_y_fold[new_indices]

            # print(f"Sampled: none-ratio: {none_ratio}, removed: {none_count - max_none_to_keep}")

        print(f"Train size: {len(train_X_fold)}, Valid size: {len(val_X_fold)}")
        
        # Create and train Random Forest model
        model = model_class(**params)
        model.fit(train_X_fold, train_y_fold)
        
        classifiers.append(model)
        
        # Calculate score and save predictions on validation set
        val_preds = model.predict_proba(val_X_fold)
        val_pred_df = pd.DataFrame(data=val_preds, columns=model.classes_)

        val_fold_info = []
        for _f in range(len(train_X)):
            f_info = pd.DataFrame(data=fold['val'], columns=["fold_idx"])
            f_info['dataset_idx'] = _f
            val_fold_info.append(f_info)

        val_fold_idx = pd.concat(val_fold_info)
        
        val_feat_df = pd.concat([
            val_fold_idx.reset_index(drop=True),
            val_X_fold.reset_index(drop=True),
            val_pred_df,
        ], axis=1)

        val_feat_df_list.append(val_feat_df)

    return classifiers, val_feat_df_list

def setup_prev_level_prediction(predictions, fold_num, num_datasets):
    new_train_level_x = pd.concat([predictions[i] for i in range(fold_num)]).sort_values(['dataset_idx', 'fold_idx'])
    return [
        new_train_level_x[new_train_level_x['dataset_idx'] == i] \
            .drop(columns=['dataset_idx', 'fold_idx']) \
            .reset_index(drop=True)
        for i in range(num_datasets)
    ]

# Column Selection

1. For each class, linear regression with 1 vs rest to give a score to each feature

2. For each tier and overall, use different threshold based on selected feature to validate

In [33]:
def concat_minmax_split(df_list, bin_nums=1000):
    """
    1) Concatenate all DataFrames in df_list (same columns).
    2) Clip values to [10% quantile, 90% quantile] for each column.
    3) Perform min-max normalization on the combined DataFrame.
    4) Split back to list of DataFrames of original size.
    """
    # 1) Concatenate all DataFrames
    big_df = pd.concat(df_list, ignore_index=True)
    
    # 2) Clip values to the 10% and 90% quantiles for each column
    for col in tqdm(big_df.columns):
        cut_bins = []
        score_labels = []
        quants = big_df[col].quantile([i / bin_nums for i in range(bin_nums)])
        quants[0] = -np.inf
        for i, quant in enumerate(quants):
            if len(cut_bins) == 0 or quant != cut_bins[-1]:
                cut_bins.append(quant)
                score_labels.append(i)

        cut_bins.append(np.inf)
        
        big_df[col] = pd.cut(
            big_df[col],
            bins=cut_bins,
            labels=score_labels
        ).astype(float)

    # 4) Split back into the original list of DataFrames
    result_list = []
    start_idx = 0
    for df in df_list:
        size = len(df)
        sub_df = big_df.iloc[start_idx : start_idx + size].copy().reset_index(drop=True)
        result_list.append(sub_df)
        start_idx += size

    return result_list


In [34]:
train_X = concat_minmax_split([raw_train])[0]

100%|██████████| 161/161 [00:01<00:00, 83.57it/s]


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np

performance_res = []
feature_score_res = []
for _f, fold in enumerate(folds):
    X_train = raw_train.iloc[fold['train']]
    X_test = raw_train.iloc[fold['val']]

    _train_y = train_y.iloc[fold['train']]
    _test_y = train_y.iloc[fold['val']]

    fold_acc = {}
    fold_feat_rank = {}
    for cat in list(train_y.columns)[1:]:
        y_train = _train_y[cat].apply(lambda x: x > 0).astype(int)
        y_test = _test_y[cat].apply(lambda x: x > 0).astype(int)

        negative = len(y_train[y_train == 0])
        positive = len(y_train[y_train == 1])

        if negative <= 1 or positive <= 1:
            print(f"Skip {cat}")
            continue

        if len(y_test[y_test == 0]) <= 1 or len(y_test[y_test == 1]) <= 1:
            print(f"Skip {cat}")
            continue

        # Train a Logistic Regression model
        model = LogisticRegression(class_weight="balanced", random_state=42)
        model.fit(X_train, y_train)

        # Make predictions (for accuracy)
        y_pred = model.predict(X_test)

        # Get predicted probabilities (for AUROC)
        y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class

        # Evaluate accuracy
        accuracy = accuracy_score(y_test, y_pred)

        # Evaluate AUROC
        auroc = roc_auc_score(y_test, y_pred_proba)

        # Get feature importance scores (coefficients)
        feature_importance = model.coef_[0]  # Coefficients for each feature
        feature_names = X_train.columns  # Assuming X_train is a DataFrame with column names

        # Create a dictionary of feature importance scores
        feat_importance_dict = {feature_names[i]: abs(feature_importance[i]) for i in range(len(feature_names))}

        # Store feature importance dictionary
        fold_feat_rank[cat] = feat_importance_dict

        # Store results
        fold_acc[cat] = [accuracy, auroc, positive, negative]
        print(f"Fold {_f}, Accuracy: {accuracy:.2f}, AUROC: {auroc:.2f} ({cat})")

    feature_score_res.append(fold_feat_rank)
    performance_res.append(fold_acc)

Fold 0, Accuracy: 0.36, AUROC: 0.65 (Active_Power_Sensor)
Fold 0, Accuracy: 0.20, AUROC: 0.83 (Air_Flow_Sensor)
Fold 0, Accuracy: 0.18, AUROC: 0.72 (Air_Flow_Setpoint)
Fold 0, Accuracy: 0.67, AUROC: 0.43 (Air_Temperature_Sensor)
Fold 0, Accuracy: 0.90, AUROC: 0.51 (Air_Temperature_Setpoint)
Fold 0, Accuracy: 0.86, AUROC: 0.92 (Alarm)
Fold 0, Accuracy: 0.97, AUROC: 0.97 (Angle_Sensor)
Fold 0, Accuracy: 0.33, AUROC: 0.56 (Average_Zone_Air_Temperature_Sensor)
Fold 0, Accuracy: 0.31, AUROC: 0.75 (Chilled_Water_Differential_Temperature_Sensor)
Fold 0, Accuracy: 0.31, AUROC: 0.54 (Chilled_Water_Return_Temperature_Sensor)
Fold 0, Accuracy: 0.34, AUROC: 0.60 (Chilled_Water_Supply_Flow_Sensor)
Fold 0, Accuracy: 0.53, AUROC: 0.64 (Chilled_Water_Supply_Temperature_Sensor)
Fold 0, Accuracy: 0.72, AUROC: 0.75 (Command)
Skip Cooling_Demand_Sensor
Fold 0, Accuracy: 0.89, AUROC: 0.76 (Cooling_Demand_Setpoint)
Fold 0, Accuracy: 0.32, AUROC: 0.54 (Cooling_Supply_Air_Temperature_Deadband_Setpoint)
Fold 0

In [71]:
prep_merge = lambda _res: pd.DataFrame(_res).transpose().rename(columns={
    0: "Acc",
    1: "AUC",
    2: "Pos",
    3: "Neg"
})

performance = pd.concat([
    prep_merge(performance_res[i])
    for i in range(3)
]).reset_index().groupby('index').agg('mean').reset_index()

performance_map = dict(performance[['index', 'AUC']].values)

In [119]:
def get_one_label_feature(label, feat_rank, p_map):
    ips =  pd.DataFrame(feat_rank[label], index=["importance"]) \
        .transpose() \
        .sort_values('importance', ascending=False) \
        .reset_index()
    # ips['importance'] = ips['importance'] * p_map[label]
    return ips

In [120]:
LEVEL_LABLES = [
    ['Alarm', 'Command', 'Parameter', 'Sensor', 'Setpoint', 'Status'],
    ['Reset_Setpoint',
    'Usage_Sensor',
    'Pressure_Sensor',
    'Flow_Setpoint',
    'Static_Pressure_Setpoint',
    'Angle_Sensor',
    'Humidity_Setpoint',
    'Temperature_Sensor',
    'Temperature_Setpoint',
    'Supply_Air_Humidity_Sensor',
    'Outside_Air_CO2_Sensor',
    'Differential_Pressure_Setpoint',
    'Damper_Position_Setpoint',
    'Heating_Demand_Setpoint',
    'Cooling_Demand_Setpoint',
    'Current_Sensor',
    'Wind_Speed_Sensor',
    'Flow_Sensor',
    'Dew_Point_Setpoint',
    'Zone_Air_Dewpoint_Sensor',
    'Power_Sensor',
    'Position_Sensor',
    'Solar_Radiance_Sensor',
    'Duration_Sensor',
    'Time_Setpoint',
    'Discharge_Air_Dewpoint_Sensor',
    'Wind_Direction_Sensor',
    'Voltage_Sensor',
    'Zone_Air_Humidity_Sensor',
    'Demand_Sensor',
    'Speed_Setpoint',
    'Rain_Sensor',
    'Frequency_Sensor',
    'Outside_Air_Humidity_Sensor',
    'Outside_Air_Enthalpy_Sensor'],
    ['Air_Flow_Sensor',
    'Water_Temperature_Setpoint',
    'Water_Flow_Sensor',
    'Electrical_Power_Sensor',
    'Zone_Air_Humidity_Setpoint',
    'Heating_Temperature_Setpoint',
    'Air_Flow_Setpoint',
    'Energy_Usage_Sensor',
    'Supply_Air_Static_Pressure_Setpoint',
    'Air_Temperature_Sensor',
    'Valve_Position_Sensor',
    'Cooling_Temperature_Setpoint',
    'Water_Temperature_Sensor',
    'Load_Current_Sensor',
    'Damper_Position_Sensor',
    'Static_Pressure_Sensor',
    'Air_Temperature_Setpoint',
    'Thermal_Power_Sensor',
    'Differential_Pressure_Sensor'],
    ['Supply_Air_Temperature_Sensor',
    'Discharge_Air_Temperature_Sensor',
    'Discharge_Water_Temperature_Sensor',
    'Zone_Air_Temperature_Sensor',
    'Supply_Air_Static_Pressure_Sensor',
    'Outside_Air_Temperature_Setpoint',
    'Supply_Air_Temperature_Setpoint',
    'Chilled_Water_Supply_Flow_Sensor',
    'Chilled_Water_Supply_Temperature_Sensor',
    'Peak_Power_Demand_Sensor',
    'Room_Air_Temperature_Setpoint',
    'Hot_Water_Supply_Temperature_Sensor',
    'Active_Power_Sensor',
    'Min_Air_Temperature_Setpoint',
    'Return_Air_Temperature_Sensor',
    'Hot_Water_Flow_Sensor',
    'Chilled_Water_Differential_Temperature_Sensor',
    'Filter_Differential_Pressure_Sensor',
    'Max_Air_Temperature_Setpoint',
    'Cooling_Supply_Air_Temperature_Deadband_Setpoint',
    'Outside_Air_Temperature_Sensor',
    'Heating_Supply_Air_Temperature_Deadband_Setpoint',
    'Discharge_Air_Temperature_Setpoint',
    'Return_Water_Temperature_Sensor',
    'Reactive_Power_Sensor'],
    ['Low_Outside_Air_Temperature_Enable_Setpoint',
    'Cooling_Demand_Sensor',
    'Chilled_Water_Return_Temperature_Sensor',
    'Average_Zone_Air_Temperature_Sensor',
    'Warmest_Zone_Air_Temperature_Sensor',
    'Heating_Demand_Sensor',
    'Differential_Supply_Return_Water_Temperature_Sensor',
    'Hot_Water_Return_Temperature_Sensor',
    'Outside_Air_Lockout_Temperature_Setpoint']
]

In [121]:
tiers = {
    1: level_labels[0],
    2: level_labels[1],
    3: level_labels[2],
    4: level_labels[3],
    5: level_labels[4]
}

def get_tier(label):
    for tier_num, tier_list in tiers.items():
        if label in tier_list:
            return tier_num
    return None  # Handle cases where the label isn't found in any tier

def sort_labels(labels):
    return sorted(labels, key=lambda label: (get_tier(label) or float('inf'), label))

In [124]:
_level_fold = [[] for _ in range(5)]
for fs_id in range(3):
    for _level, _level_labels in enumerate(LEVEL_LABLES):
        _agg = []
        for _label in _level_labels:
            if _label in feature_score_res[fs_id] and _label in performance_map:
                _agg.append(get_one_label_feature(
                    _label, feature_score_res[fs_id], performance_map
                ))
        _agg_df = pd.concat(_agg).groupby(['index']).agg('mean').reset_index()
        _agg_df = _agg_df.sort_values('importance', ascending=False).reset_index(drop=True)

        _level_fold[_level].append(_agg_df)

In [134]:
for l in range(5):
    pd.concat(_level_fold[1]) \
        .groupby('index') \
        .agg('mean') \
        .reset_index() \
        .sort_values('importance', ascending=False) \
        .to_csv(f'../ensemble/config/linear_feature_importance/tier_{l}.csv', index=False)

In [138]:
list(raw_train.columns)

['0_Absolute energy',
 '0_Area under the curve',
 '0_Autocorrelation',
 '0_Average power',
 '0_Centroid',
 '0_ECDF Percentile Count_0',
 '0_ECDF Percentile Count_1',
 '0_ECDF Percentile_0',
 '0_ECDF Percentile_1',
 '0_ECDF_0',
 '0_ECDF_1',
 '0_ECDF_2',
 '0_ECDF_3',
 '0_ECDF_4',
 '0_ECDF_5',
 '0_ECDF_6',
 '0_ECDF_7',
 '0_ECDF_8',
 '0_ECDF_9',
 '0_Entropy',
 '0_Histogram mode',
 '0_Interquartile range',
 '0_Kurtosis',
 '0_Max',
 '0_Mean',
 '0_Mean absolute deviation',
 '0_Mean absolute diff',
 '0_Mean diff',
 '0_Median',
 '0_Median absolute deviation',
 '0_Median absolute diff',
 '0_Median diff',
 '0_Min',
 '0_Negative turning points',
 '0_Neighbourhood peaks',
 '0_Peak to peak distance',
 '0_Positive turning points',
 '0_Root mean square',
 '0_Signal distance',
 '0_Skewness',
 '0_Slope',
 '0_Standard deviation',
 '0_Sum absolute diff',
 '0_Variance',
 '0_Zero crossing rate',
 '0_Fundamental frequency',
 '0_Human range energy',
 '0_Max power spectrum',
 '0_Maximum frequency',
 '0_Median 

In [143]:
raw_train['0_Wavelet absolute mean_1.78Hz']

0           0.032664
1           0.012249
2           0.022363
3           2.087083
4           0.127394
            ...     
31834    9838.114440
31835       0.001089
31836       0.217839
31837       0.653516
31838       0.005445
Name: 0_Wavelet absolute mean_1.78Hz, Length: 31839, dtype: float64

In [144]:
raw_train['0_Wavelet standard deviation_1.78Hz']

0             1.842754
1             0.977356
2            46.548626
3           351.433275
4            19.583580
             ...      
31834    832523.315203
31835         0.129632
31836         5.012331
31837        15.036992
31838         0.177274
Name: 0_Wavelet standard deviation_1.78Hz, Length: 31839, dtype: float64