# Modeling

In [1]:
import numpy as np 
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold, GroupKFold
from sklearn.ensemble import VotingRegressor

import optuna
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

pd.set_option('display.max_columns', None)
#########################################################
train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
ss = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')

In [2]:
from sklearn.preprocessing import RobustScaler, normalize
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay

2021-10-30 13:56:25.068593: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [3]:
def add_features(df):
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    df['u_in_lag1'] = df.groupby('breath_id')['u_in'].shift(1)
    df['u_out_lag1'] = df.groupby('breath_id')['u_out'].shift(1)
    df['u_in_lag_back1'] = df.groupby('breath_id')['u_in'].shift(-1)
    df['u_out_lag_back1'] = df.groupby('breath_id')['u_out'].shift(-1)
    df['u_in_lag2'] = df.groupby('breath_id')['u_in'].shift(2)
    df['u_out_lag2'] = df.groupby('breath_id')['u_out'].shift(2)
    df['u_in_lag_back2'] = df.groupby('breath_id')['u_in'].shift(-2)
    df['u_out_lag_back2'] = df.groupby('breath_id')['u_out'].shift(-2)
    df['u_in_lag3'] = df.groupby('breath_id')['u_in'].shift(3)
    df['u_out_lag3'] = df.groupby('breath_id')['u_out'].shift(3)
    df['u_in_lag_back3'] = df.groupby('breath_id')['u_in'].shift(-3)
    df['u_out_lag_back3'] = df.groupby('breath_id')['u_out'].shift(-3)
    df['u_in_lag4'] = df.groupby('breath_id')['u_in'].shift(4)
    df['u_out_lag4'] = df.groupby('breath_id')['u_out'].shift(4)
    df['u_in_lag_back4'] = df.groupby('breath_id')['u_in'].shift(-4)
    df['u_out_lag_back4'] = df.groupby('breath_id')['u_out'].shift(-4)
    df = df.fillna(0)
    
    df['breath_id__u_in__max'] = df.groupby(['breath_id'])['u_in'].transform('max')
    df['breath_id__u_out__max'] = df.groupby(['breath_id'])['u_out'].transform('max')
    
    df['u_in_diff1'] = df['u_in'] - df['u_in_lag1']
    df['u_out_diff1'] = df['u_out'] - df['u_out_lag1']
    df['u_in_diff2'] = df['u_in'] - df['u_in_lag2']
    df['u_out_diff2'] = df['u_out'] - df['u_out_lag2']
    
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    
    df['breath_id__u_in__diffmax'] = df.groupby(['breath_id'])['u_in'].transform('max') - df['u_in']
    df['breath_id__u_in__diffmean'] = df.groupby(['breath_id'])['u_in'].transform('mean') - df['u_in']
    
    df['u_in_diff3'] = df['u_in'] - df['u_in_lag3']
    df['u_out_diff3'] = df['u_out'] - df['u_out_lag3']
    df['u_in_diff4'] = df['u_in'] - df['u_in_lag4']
    df['u_out_diff4'] = df['u_out'] - df['u_out_lag4']
    df['cross']= df['u_in']*df['u_out']
    df['cross2']= df['time_step']*df['u_out']
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['R__C'] = df["R"].astype(str) + '__' + df["C"].astype(str)
    df = pd.get_dummies(df)
    return df
train = add_features(train)
test = add_features(test)

In [4]:
train = train.fillna(0)
test = test.fillna(0)

In [5]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
train.drop(['pressure', 'id', 'breath_id'], axis = 1, inplace = True)
test = test.drop(['id', 'breath_id'], axis = 1)

In [6]:
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

In [7]:
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

In [8]:
EPOCH = 200
BATCH_SIZE = 1024

kf = KFold(n_splits = 2, shuffle = True, random_state = 228)
test_preds = []
for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
    print('-'*15, '>', f'Fold {fold+1}', '<', '-'*15)
    X_train, X_valid = train[train_idx], train[test_idx]
    y_train, y_valid = targets[train_idx], targets[test_idx]
    model = keras.models.Sequential([
        keras.layers.Input(shape = train.shape[-2:]),
        keras.layers.Bidirectional(keras.layers.LSTM(265, return_sequences = True)),
        keras.layers.Bidirectional(keras.layers.LSTM(256, return_sequences = True)),
        keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences = True)),
        keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences = True)),
        keras.layers.Dense(128, activation = 'selu'),
        keras.layers.Dense(64, activation = 'selu'),
        keras.layers.Dense(1),
    ])
    model.compile(optimizer = "adam", loss = "mae")

    scheduler = ExponentialDecay(1e-3, 400*((len(train)*0.8)/BATCH_SIZE), 1e-5)
    lr = LearningRateScheduler(scheduler, verbose = 1)
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

    model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs = EPOCH, batch_size = BATCH_SIZE, callbacks = [lr, callback])

    test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())

--------------- > Fold 1 < ---------------


2021-10-30 13:57:32.745104: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-30 13:57:32.747854: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-10-30 13:57:32.790954: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-30 13:57:32.791689: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0
coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s
2021-10-30 13:57:32.791764: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-10-30 13:57:32.791843: I tensorflow/stream_executor/platform/def

Epoch 1/200

Epoch 00001: LearningRateScheduler reducing learning rate to tf.Tensor(0.001, shape=(), dtype=float32).


2021-10-30 13:57:46.839643: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2021-10-30 13:57:47.606635: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2021-10-30 13:57:47.661869: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8




2021-10-30 13:58:08.341502: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 603600000 exceeds 10% of free system memory.


Epoch 2/200

Epoch 00002: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009995119, shape=(), dtype=float32).
Epoch 3/200

Epoch 00003: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009990239, shape=(), dtype=float32).
Epoch 4/200

Epoch 00004: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009985362, shape=(), dtype=float32).
Epoch 5/200

Epoch 00005: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009980488, shape=(), dtype=float32).
Epoch 6/200

Epoch 00006: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009975616, shape=(), dtype=float32).
Epoch 7/200

Epoch 00007: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009970746, shape=(), dtype=float32).
Epoch 8/200

Epoch 00008: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009965879, shape=(), dtype=float32).
Epoch 9/200

Epoch 00009: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009961014, shape=(), dtype=float32).
Epoch 10

2021-10-30 14:27:37.448502: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 804800000 exceeds 10% of free system memory.


--------------- > Fold 2 < ---------------


2021-10-30 14:28:21.461596: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 603600000 exceeds 10% of free system memory.


Epoch 1/200

Epoch 00001: LearningRateScheduler reducing learning rate to tf.Tensor(0.001, shape=(), dtype=float32).

2021-10-30 14:28:47.152495: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 603600000 exceeds 10% of free system memory.


Epoch 2/200

Epoch 00002: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009995119, shape=(), dtype=float32).
Epoch 3/200

Epoch 00003: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009990239, shape=(), dtype=float32).
Epoch 4/200

Epoch 00004: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009985362, shape=(), dtype=float32).
Epoch 5/200

Epoch 00005: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009980488, shape=(), dtype=float32).
Epoch 6/200

Epoch 00006: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009975616, shape=(), dtype=float32).
Epoch 7/200

Epoch 00007: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009970746, shape=(), dtype=float32).
Epoch 8/200

Epoch 00008: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009965879, shape=(), dtype=float32).
Epoch 9/200

Epoch 00009: LearningRateScheduler reducing learning rate to tf.Tensor(0.0009961014, shape=(), dtype=float32).
Epoch 10

In [9]:
ss['pressure'] = sum(test_preds) / 2
ss.to_csv('submission.csv', index = False)