In [195]:
# Version futuresales lib required = 0.1.8

!pip install -i https://test.pypi.org/simple/ futuresales_denissimo==0.1.8
!pip install plotly==5.3.1
!pip install neptune-client
!pip install neptune-xgboost

In [196]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

import futuresales as fs

from futuresales.distribution import from_pickle, to_pickle
from futuresales.utils import make_scaled, load_credentials
from futuresales.validation import Validator

from neptune.new.types import File

In [197]:
def get_statistics(report):
    stat = report[3]
    stat['residuals'] = report[2].transpose()[0] - report[3].valid_target
    stat['predicted'] = report[2]
    stat['object_id'] = report[3].index
    return stat

# Base neptune configuration

In [198]:
import neptune.new as neptune
import xgboost as xgb
from neptune.new.integrations.xgboost import NeptuneCallback
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

cred = {
    "project": "denissimo/FS-Results",
    "api_token": "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMTIyMjhjMi04MzMyLTQ2MTItOTZkZS0yZmMxNTg1YzcyNWUifQ=="
}

run = neptune.init(
    api_token=cred['api_token'],
    project=cred['project'],
    tags=['xgboost', 'baseline'],
)  # your credentials


FETCH_BACKUP = False

# Dataset preparation

In [199]:
task_df = {}

task_df['test'] = pd.read_csv('/kaggle/input/fsfeaturesbaseline/task_df_test.csv')
task_df['train'] = pd.read_csv('/kaggle/input/fsfeaturesbaseline/task_df_train.csv')
task_df['idx'] = pd.read_csv('/kaggle/input/fsfeaturesbaseline/idx.csv')

train = pd.read_csv('/kaggle/input/fsfeaturesbaseline/baseline_train_set.csv')
test = pd.read_csv('/kaggle/input/fsfeaturesbaseline/baseline_test_set.csv')
validation = pd.read_csv('/kaggle/input/fsfeaturesbaseline/baseline_validation_set.csv')

In [200]:
train_x = train.drop(['valid_target', 'target'], axis=1)
train_y = train.loc[:, ['target']]
train_y_valid = train.loc[:, ['valid_target']]

test_x = test.drop(['valid_target', 'target'], axis=1)
test_y = test.loc[:, ['target']]
test_y_valid = test.loc[:, ['valid_target']]

validation_x = validation.drop(['valid_target', 'target'], axis=1)
validation_y = validation.loc[:, ['target']]
validation_y_valid = validation.loc[:, ['valid_target']]

train_x = make_scaled(train_x).drop('id', axis=1).reset_index().rename({'index': 'id'}, axis=1).set_index('id')
test_x = make_scaled(test_x).drop('id', axis=1).reset_index().rename({'index': 'id'}, axis=1).set_index('id')
validation_x = make_scaled(validation_x).drop('id', axis=1).reset_index().rename({'index': 'id'}, axis=1).set_index('id')

# Model tuning

In [201]:
errors = {}

In [202]:
# Create neptune callback
neptune_callback = NeptuneCallback(run=run, log_tree=[0, 1, 2, 3, 4, 5])


# Define parameters
model_params = {
    'eta': 0.5,
    'max_depth': 3,
    'n_estimators': 300,
    'reg_alpha': 1,
    'reg_lambda': 1000
}

In [203]:
from xgboost import XGBRegressor

In [204]:
num_round = 144

reg = xgb.XGBRegressor(**model_params)

# Fit the model and log metadata to the run in Neptune
reg.fit(
    train_x,
    train_y,
    eval_metric=['rmse'],
    eval_set=[(train_x, train_y.to_numpy().transpose()[0]), (test_x, test_y_valid.to_numpy().transpose()[0])],
    callbacks=[
#         neptune_callback,
        xgb.callback.LearningRateScheduler(lambda epoch: 0.99**epoch),
    ]
)

In [205]:
xgb_validator = Validator(XGBRegressor, **model_params)

xgb_validator.fit(test_x, test_y)
errors['xgb'] = xgb_validator.validate(validation_x, validation_y_valid)
residulas = get_statistics(errors['xgb'])

run['params'] = model_params

run['score/rmse/test'] = errors['xgb'][0]
run['score/rmse/train'] = errors['xgb'][1]
run['model'] = 'XGradientBoosting'
run['dump/residuals'].upload(File.as_pickle(residulas))
run['dump/model'].upload(File.as_pickle(xgb_validator))

In [206]:
run.stop()

In [None]:
from time import sleep

while True:
    sleep(3)