# Acknowledgement
### Thanks to [@cdeotte](https://www.kaggle.com/cdeotte) for sharing [this](https://www.kaggle.com/cdeotte/ensemble-folds-with-median-0-153) great finding. Surely helped increase my score.

# GPU Info

In [2]:
!nvidia-smi

Fri Oct  8 14:45:02 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 471.41       Driver Version: 471.41       CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   42C    P8    11W /  N/A |    121MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Imports

In [3]:
# Asthetics
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

# General
from IPython.display import display
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import os
import glob
import random
import gc
gc.enable()
pd.set_option('display.max_columns', None)

# Utils
from sklearn import preprocessing
# Deep Learning
import tensorflow as tf
from tensorflow import keras
# Metrics
from sklearn.metrics import mean_absolute_error

# Random Seed Initialize
RANDOM_SEED = 42

def seed_everything(seed=RANDOM_SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything()

In [1]:
data_dir = ''
train_file_path = os.path.join(data_dir, 'train.csv')
test_file_path = os.path.join(data_dir, 'test.csv')
sample_sub_file_path = os.path.join(data_dir, 'sample_submission.csv')

models_dir = '../input/google-brain-ventilator-tf-lstm-models/50_Features'

print(f'Train file: {train_file_path}')
print(f'Test file: {test_file_path}')
print(f'Sample Sub file: {sample_sub_file_path}')

Train file: train.csv
Test file: test.csv
Sample Sub file: sample_submission.csv


In [4]:
train_df = pd.read_csv(train_file_path)
test_df = pd.read_csv(test_file_path)
sub_df = pd.read_csv(sample_sub_file_path)

In [5]:
display(test_df.head())
print(test_df.shape)

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.0,0.0,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.23061,0
4,5,0,5,20,0.127644,26.320956,0


(4024000, 7)


In [6]:
all_pressure = np.sort(train_df.pressure.unique())
PRESSURE_MIN = all_pressure[0].item()
PRESSURE_MAX = all_pressure[-1].item()
PRESSURE_STEP = ( all_pressure[1] - all_pressure[0] ).item()

# Feature Engineering

In [7]:
# From https://www.kaggle.com/tenffe/finetune-of-tensorflow-bidirectional-lstm
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df = df.fillna(0)
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    return df

In [8]:
train_df = add_features(train_df)
test_df = add_features(test_df)

display(test_df.head())
print(test_df.shape)

Unnamed: 0,id,breath_id,time_step,u_in,u_out,area,u_in_cumsum,u_in_lag1,u_out_lag1,u_in_lag_back1,u_out_lag_back1,u_in_lag2,u_out_lag2,u_in_lag_back2,u_out_lag_back2,u_in_lag3,u_out_lag3,u_in_lag_back3,u_out_lag_back3,u_in_lag4,u_out_lag4,u_in_lag_back4,u_out_lag_back4,breath_id__u_in__max,breath_id__u_out__max,u_in_diff1,u_out_diff1,u_in_diff2,u_out_diff2,breath_id__u_in__diffmax,breath_id__u_in__diffmean,u_in_diff3,u_out_diff3,u_in_diff4,u_out_diff4,cross,cross2,R_20,R_5,R_50,C_10,C_20,C_50,R__C_20__10,R__C_20__20,R__C_20__50,R__C_50__10,R__C_50__20,R__C_50__50,R__C_5__10,R__C_5__20,R__C_5__50
0,1,0,0.0,0.0,0,0.0,0.0,0.0,0.0,7.515046,0.0,0.0,0.0,14.651675,0.0,0.0,0.0,21.23061,0.0,0.0,0.0,26.320956,0.0,37.542219,1,0.0,0.0,0.0,0.0,37.542219,9.327338,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0
1,2,0,0.031904,7.515046,0,0.239758,7.515046,0.0,0.0,14.651675,0.0,0.0,0.0,21.23061,0.0,0.0,0.0,26.320956,0.0,0.0,0.0,30.486938,0.0,37.542219,1,7.515046,0.0,7.515046,0.0,30.027173,1.812292,7.515046,0.0,7.515046,0.0,0.0,0.0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0
2,3,0,0.063827,14.651675,0,1.174935,22.166721,7.515046,0.0,21.23061,0.0,0.0,0.0,26.320956,0.0,0.0,0.0,30.486938,0.0,0.0,0.0,33.54595,0.0,37.542219,1,7.13663,0.0,14.651675,0.0,22.890543,-5.324338,14.651675,0.0,14.651675,0.0,0.0,0.0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0
3,4,0,0.095751,21.23061,0,3.207788,43.397331,14.651675,0.0,26.320956,0.0,7.515046,0.0,30.486938,0.0,0.0,0.0,33.54595,0.0,0.0,0.0,35.7176,0.0,37.542219,1,6.578935,0.0,13.715564,0.0,16.311609,-11.903272,21.23061,0.0,21.23061,0.0,0.0,0.0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0
4,5,0,0.127644,26.320956,0,6.567489,69.718287,21.23061,0.0,30.486938,0.0,14.651675,0.0,33.54595,0.0,7.515046,0.0,35.7176,0.0,0.0,0.0,36.971061,0.0,37.542219,1,5.090346,0.0,11.669281,0.0,11.221263,-16.993619,18.805911,0.0,26.320956,0.0,0.0,0.0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0


(4024000, 52)


In [None]:
train_df.drop(['id', 'pressure', 'breath_id'], axis=1, inplace=True)
test_df = test_df.drop(['id', 'breath_id'], axis=1)

scaler = preprocessing.RobustScaler()
train_df = scaler.fit_transform(train_df)
test_df = scaler.transform(test_df)

del train_df
gc.collect()

X_test = test_df.reshape(-1, 80, test_df.shape[-1])

# Prediction

In [None]:
predicted_labels = []
for model_name in glob.glob(models_dir + '/*.h5'):
    print(model_name)
    model = tf.keras.models.load_model(model_name)
    predictions = model.predict(X_test).squeeze().reshape(-1, 1).squeeze()
    predicted_labels.append(predictions)
    
    del model
    gc.collect()

../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_2_Fold_Weights.h5


2021-10-06 14:47:09.191067: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-06 14:47:09.194436: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-10-06 14:47:09.243997: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-06 14:47:09.244660: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0
coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s
2021-10-06 14:47:09.244729: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-10-06 14:47:09.275774: I tensorflow/stream_executor/platform/def

../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_9_Fold_Weights.h5


2021-10-06 14:48:47.275718: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 804800000 exceeds 10% of free system memory.


../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_10_Fold_Weights.h5


2021-10-06 14:50:09.161467: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 804800000 exceeds 10% of free system memory.


../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_4_Fold_Weights.h5


2021-10-06 14:51:30.670162: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 804800000 exceeds 10% of free system memory.


../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_5_Fold_Weights.h5


2021-10-06 14:52:52.502883: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 804800000 exceeds 10% of free system memory.


../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_7_Fold_Weights.h5
../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_8_Fold_Weights.h5
../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_1_Fold_Weights.h5
../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_3_Fold_Weights.h5
../input/google-brain-ventilator-tf-lstm-models/50_Features/TF_LSTM_6_Fold_Weights.h5


In [None]:
sub_df['pressure'] = sum(predicted_labels)/(len(glob.glob(models_dir + '/*.h5')))
display(sub_df.head())
sub_df.to_csv('submission_mean.csv', index=False)

Unnamed: 0,id,pressure
0,1,6.249198
1,2,5.945623
2,3,7.118114
3,4,7.602633
4,5,9.117546


In [None]:
# From https://www.kaggle.com/cdeotte/ensemble-folds-with-median-0-153
sub_df['pressure'] = np.median(np.vstack(predicted_labels),axis=0)*.75+np.mean(np.vstack(predicted_labels),axis=0)*.25
display(sub_df.head())
sub_df.to_csv('submission_median.csv', index=False)

Unnamed: 0,id,pressure
0,1,6.252702
1,2,5.950576
2,3,7.131741
3,4,7.60378
4,5,9.115738


In [None]:
# From https://www.kaggle.com/cdeotte/ensemble-folds-with-median-0-153
sub_df['pressure'] = np.round((sub_df.pressure - PRESSURE_MIN)/PRESSURE_STEP ) * PRESSURE_STEP + PRESSURE_MIN
sub_df.pressure = np.clip(sub_df.pressure, PRESSURE_MIN, PRESSURE_MAX)
display(sub_df.head())
sub_df.to_csv('submission.csv', index=False)

Unnamed: 0,id,pressure
0,1,6.259305
1,2,5.978096
2,3,7.10293
3,4,7.595045
4,5,9.141693
