In [112]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [113]:
train = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/train.csv', index_col = 'id')
print('Train shape: ', train.shape)

test = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/test.csv', index_col = 'id')
print('Test shape: ', test.shape)

submisison = pd.read_csv('/kaggle/input/ventilator-pressure-prediction/sample_submission.csv', index_col = 'id')
print('Submission shape: ', submisison.shape)

In [114]:
train.head()

In [115]:
test.head()

In [116]:
submisison.head()

In [117]:
data = pd.concat([train, test], sort = False)
data.info()

In [118]:
null_cols = [col for col in data.iloc[: , : -1].columns if data[col].isnull().sum() != 0]
null_cols

In [119]:
for col in data.columns:
    print(data[col].value_counts())
    if col != data.columns[-1]:
        print('\n' + '=' * 46 + '\n')        

In [None]:
categorical_feature = []

for col in data.columns:
    if len(data[col].unique()) < 10:
        categorical_feature.append(col)

categorical_feature

del data

In [None]:
train['interval_step'] = (train.time_step - train.time_step.shift(1)).shift(-1)
train['interval_step'].fillna(method = 'ffill', inplace = True)
train.drop(['time_step'], axis=1)

In [None]:
test['interval_step'] = (test.time_step - test.time_step.shift(1)).shift(-1)
test['interval_step'].fillna(method = 'ffill', inplace = True)
test.drop(['time_step'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split

X = train.copy()
y = X.pop('pressure')
X_test = test.copy()

del train, test

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.3,
                                                      random_state = 42, stratify = y)

In [None]:
import optuna
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error

lgb_train = lgb.Dataset(X_train, y_train, categorical_feature = categorical_feature)
lgb_eval = lgb.Dataset(X_valid, y_valid, reference = lgb_train, categorical_feature = categorical_feature)

def objective(trial):
    params = {
        'metric': 'mean_absolute_error',
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.02),
        'max_bin': trial.suggest_int('max_bin', 255, 500),
        'num_leaves': trial.suggest_int('num_leaves', 32, 128),
    }

    model = lgb.train(params,
                      lgb_train,
                      valid_sets = [lgb_train, lgb_eval],
                      verbose_eval = 10,
                      num_boost_round = 1000,
                      early_stopping_rounds = 10)
    y_pred_valid = model.predict(X_valid, num_iteration = model.best_iteration)
    
    score = mean_absolute_error(y_valid, y_pred_valid)
    return score

study = optuna.create_study(sampler = optuna.samplers.RandomSampler(seed = 0))
study.optimize(objective, n_trials = 10)

In [None]:
study.best_params

In [None]:
params = {
    'metric': 'mean_absolute_error',
    'learning_rate': study.best_params['learning_rate'],
    'num_leaves': study.best_params['num_leaves'],
    'max_bin': study.best_params['max_bin'],
}

model = lgb.train(params,
                  lgb_train,
                  valid_sets = [lgb_train, lgb_eval],
                  verbose_eval = 10,
                  num_boost_round = 2000,
                  early_stopping_rounds = 10)


y_pred = model.predict(X_test, num_iteration = model.best_iteration)

In [None]:
sub = pd.read_csv("/kaggle/input/tabular-playground-series-oct-2021/sample_submission.csv")
sub['target'] = y_pred
sub.to_csv('submission_1st_trial.csv', index = False)

In [None]:
sub.head()