In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

import optuna

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers.schedules import ExponentialDecay

from sklearn.metrics import mean_absolute_error as mae
from sklearn.preprocessing import RobustScaler, normalize
from sklearn.model_selection import train_test_split, GroupKFold, KFold

from IPython.display import display
import time

import warnings  
warnings.filterwarnings('ignore')

train = pd.read_csv('../input/ventilator-pressure-prediction/train.csv')
test = pd.read_csv('../input/ventilator-pressure-prediction/test.csv')
submission = pd.read_csv('../input/ventilator-pressure-prediction/sample_submission.csv')


In [None]:
physical_devices = tf.config.list_physical_devices('CPU')
print("Num CPUs:", len(physical_devices))
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

In [None]:
# add more features

def add_features(df):
    df['u_in_cumsum'] = (df['u_in']).groupby(df['breath_id']).cumsum()
    
    df['area'] = df['time_step'] * df['u_in']
    df['area'] = df.groupby('breath_id')['area'].cumsum()
    
    df['one'] = 1
    df['count'] = (df['one']).groupby(df['breath_id']).cumsum()
    df['u_in_cummean'] =df['u_in_cumsum'] /df['count']
    
    df['breath_id_lag'] = df['breath_id'].shift(1).fillna(0)
    df['breath_id_lag2'] = df['breath_id'].shift(2).fillna(0)
    df['breath_id_lagsame'] = np.select([df['breath_id_lag'] == df['breath_id']],[1],0)
    df['breath_id_lag2same'] = np.select([df['breath_id_lag2'] == df['breath_id']],[1],0)
    df['u_in_lag'] = df['u_in'].shift(1).fillna(0)
    df['u_in_lag'] = df['u_in_lag'] * df['breath_id_lagsame']
    df['u_in_lag2'] = df['u_in'].shift(2).fillna(0)
    df['u_in_lag2'] = df['u_in_lag2'] * df['breath_id_lag2same']
    df['u_out_lag2'] = df['u_out'].shift(2).fillna(0)
    df['u_out_lag2'] = df['u_out_lag2'] * df['breath_id_lag2same']
    
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df['RC'] = df['R'] + df['C']
    df = pd.get_dummies(df)
    
    return df

train = add_features(train)
test = add_features(test)

In [None]:
targets = train[['pressure']].to_numpy().reshape(-1, 80)
print(targets)

In [None]:
train.drop(['pressure','breath_id', 'one', 'count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1, inplace=True)
test = test.drop(['breath_id','one', 'count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2'], axis=1)

In [None]:
# standardization - centering and scaling
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

In [None]:
EPOCH = 300
BATCH_SIZE = 128

# detecting and initializing the tpu (tensor processing unit)
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

with tpu_strategy.scope():
    kf = KFold(n_splits=5, shuffle=True, random_state=2021)
    test_preds = []
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(train, targets)):
        print('-'*15, '>', f'Fold {fold + 1}', '<', '-'*15)
        X_train, X_valid = train[train_idx], train[test_idx]
        y_train, y_valid = targets[train_idx], targets[test_idx]
        
        model = keras.models.Sequential([
            keras.layers.Input(shape = train.shape[-2:]),
            keras.layers.Bidirectional(keras.layers.LSTM(300, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(200, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(150, return_sequences = True)),
            keras.layers.Bidirectional(keras.layers.LSTM(100, return_sequences = True)),
             keras.layers.Dense(50, activation='selu'),
            keras.layers.Dense(1),
        ])
        model.compile(optimizer = "adam", loss = "mae")
        
        scheduler = ExponentialDecay(1e-3, 400*((len(train) * 0.8)/ BATCH_SIZE), 1e-5)
        lr = LearningRateScheduler(scheduler, verbose = 1)
        
        model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=EPOCH, batch_size=BATCH_SIZE, callbacks=[lr])
        
        # preds
        test_preds.append(model.predict(test).squeeze().reshape(-1, 1).squeeze())

In [None]:
submission['pressure'] = sum(test_preds)/5
submission.to_csv('submission7.csv', index = False)