In [1]:
#############################################################
# 1. Libraries

import pandas as pd
import numpy as np 
import os
import glob
from tqdm import tqdm
from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold
# import tensorflow as tf
# tf.keras.backend.clear_session()

# physical_devices = tf.config.list_physical_devices('GPU')

# try:
#     tf.config.experimental.set_memory_growth(physical_devices[0], True)
# except:
#     print('Invalid device or cannot modify virtual devices once initialized.')

# from tensorflow.keras import models, layers, regularizers, metrics, losses, optimizers
import pywt
import matplotlib.pyplot as plt
import gc

#############################################################

In [2]:
#############################################################
# 2. Paths & Global Variables

## 2.1 Paths

path = '../01_Data/'
path_sequences = path + '01_GeneratedSequences/'

df_train = pd.read_csv(path + 'train.csv')
df_sample_submission = pd.read_csv(path + 'sample_submission.csv') 

train_paths = glob.glob(path + 'train/*')
test_paths = glob.glob(path + 'test/*')

unique_segments_id_train = set(df_train['segment_id'])
unique_segments_id_test = set(df_sample_submission['segment_id'])

dict_unique_segments_train_id = { v : k for k, v in enumerate(unique_segments_id_train)}
dict_unique_segments_train_id_inv = { k : v for k, v in enumerate(unique_segments_id_train)}

## 2.2 Global Variables

SEQ_LENGTH = 60_001

#############################################################

In [3]:
#############################################################
# 3. GLobal Functions


def buildSequences(dict_segment_paths_all, mask_value=-1.0, s=0):
    X = np.zeros((len(dict_segment_paths_all), SEQ_LENGTH, 1))
    for i, segment in enumerate(tqdm(dict_segment_paths_all, total=len(dict_segment_paths_all), position=0)):
        segment_path = dict_segment_paths_all[segment]
#     X = np.load(dict_segment_paths_all, allow_pickle=True)[-SEQ_LENGTH:]
        values = np.load(segment_path, allow_pickle=True)[-SEQ_LENGTH:, sensor]
        X[i] = np.expand_dims(values, -1)
    return X


#############################################################

In [4]:
#############################################################
# 4. Preprocess

dict_segment_paths_train = {
    segment : path + 'train/' + str(segment) + '.csv' for segment in unique_segments_id_train
}

dict_segment_paths_test = {
    segment : path + 'test/' + str(segment) + '.csv' for segment in unique_segments_id_test
}

dict_segments_sequences_paths_train = {
    segment : path_sequences + 'train/' + str(segment) + '.npy' for segment in unique_segments_id_train
}

dict_segments_sequences_paths_test = {
    segment : path_sequences + 'test/' + str(segment) + '.npy' for segment in unique_segments_id_test
}


#############################################################

In [5]:
def buildSequences(dict_segment_paths, mask_value=-1.0, s=0):
    X = np.zeros((len(dict_segment_paths), SEQ_LENGTH, 1))
    for i, segment in enumerate(tqdm(dict_segment_paths, total=len(dict_segment_paths), position=0)):
        segment_path = dict_segment_paths[segment]
        values = np.load(segment_path)[:, s]
        X[i] = np.expand_dims(values, -1)
    return X

In [6]:
#############################################################
# 5. Build Sequences

mask_value = 0.0
dict_segment_paths_all = {**dict_segments_sequences_paths_train , **dict_segments_sequences_paths_test}

dict_means_1, dict_stds_1 = {}, {}
dict_means_2, dict_stds_2 = {}, {}

for sensor in range(10):
    X_sensor = buildSequences(dict_segment_paths_all, mask_value=mask_value, s=sensor)
    dict_means_1[sensor] = np.mean(X_sensor)
    dict_stds_1[sensor] = np.std(X_sensor)
    dict_means_2[sensor] = X_sensor[:, :, 0][np.where(X_sensor[:, :, 0]!=mask_value)].mean()
    dict_stds_2[sensor] = X_sensor[:, :, 0][np.where(X_sensor[:, :, 0]!=mask_value)].std()
        
#############################################################

100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [02:15<00:00, 65.94it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [01:53<00:00, 78.60it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [01:53<00:00, 79.16it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [01:53<00:00, 79.20it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [01:52<00:00, 79.31it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [01:52<00:00, 79.46it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [01:52<00:00, 79.30it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 8951/8951 [01:53<00:00, 78.65it/s]
100%|███████████████████████████████████

In [8]:
print(dict_means_1, dict_stds_1)
print('*****'*20)
print(dict_means_2, dict_stds_2)
print('*****'*20)

{0: 0.09421943291597953, 1: 0.9208114415834104, 2: -0.026617075839858038, 3: 0.09724443370400684, 4: 1.704695380910225, 5: -0.1180321202370159, 6: 0.7667902421713446, 7: 0.7804286101804458, 8: -0.2075797991904395, 9: 0.014516944212624944} {0: 1820.6211174856987, 1: 1931.0901612736805, 2: 1738.1671740163413, 3: 1669.8837574619292, 4: 568.5221048211192, 5: 1848.4917466767877, 6: 1623.353060255481, 7: 1618.2714709240895, 8: 1590.9403316558762, 9: 1906.41447528788}
****************************************************************************************************
{0: 0.0992026042612393, 1: 1.2060617571250518, 2: -0.02966338531178954, 3: 0.09754622823302604, 4: 2.32806892426326, 5: -0.11826701439654377, 6: 0.7763418428446927, 7: 0.8519833627444088, 8: -0.23542573879948414, 9: 0.01773786504093307} {0: 1868.1462371697446, 1: 2210.049269605032, 2: 1834.9394142025242, 3: 1672.472961594481, 4: 664.3870611243652, 5: 1850.3301619462713, 6: 1633.4324975266845, 7: 1690.8315356528867, 8: 1694.292227

In [None]:
# 1. Get stats for each segments and later mean

# dict_means = {0: -0.525764, 1: 1.10702551, 2: -1.18589638, 3: -0.11519057,  4:2.83976518,
#                 5: 0.13952936, 6: -0.15218268, 7: -0.00655127, 8: -0.1010594 , 9: -0.32835738}

# dict_stds = {0: 651.76880839, 1: 977.6527808, 2: 589.45645994 , 3: 648.11128631 , 4: 250.97889182,
#              5: 817.92460032, 6: 528.61988925 , 7: 550.02363897 , 8: 655.24171728, 9:1057.76947439}



---