In [None]:
## Version futuresales lib required = 0.1.8

!pip install -i https://test.pypi.org/simple/ futuresales_denissimo==0.1.8
!pip install plotly==5.3.1
!pip install neptune-client
!pip install neptune-sklearn

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

import futuresales as fs

from futuresales.distribution import from_pickle, to_pickle
from futuresales.utils import make_scaled, load_credentials
from futuresales.validation import Validator

from neptune.new.types import File

In [None]:
def get_statistics(report):
    stat = report[3]
    stat['residuals'] = report[2].transpose()[0] - report[3].valid_target
    stat['predicted'] = report[2]
    stat['object_id'] = report[3].index
    return stat

# Base neptune configuration

In [None]:
import neptune.new as neptune

cred = {
            "project": "denissimo/FS-Results",
            "api_token": "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIyMTIyMjhjMi04MzMyLTQ2MTItOTZkZS0yZmMxNTg1YzcyNWUifQ=="
}

run = neptune.init(
    api_token=cred['api_token'],
    project=cred['project'],
    tags=['rfr', 'baseline']
)  # your credentials

FETCH_BACKUP = False

# Dataset preparation

In [None]:
task_df = {}

task_df['test'] = pd.read_csv('/kaggle/input/fsfeaturesbaseline/task_df_test.csv')
task_df['train'] = pd.read_csv('/kaggle/input/fsfeaturesbaseline/task_df_train.csv')
task_df['idx'] = pd.read_csv('/kaggle/input/fsfeaturesbaseline/idx.csv')

train = pd.read_csv('/kaggle/input/fsfeaturesbaseline/baseline_train_set.csv')
test = pd.read_csv('/kaggle/input/fsfeaturesbaseline/baseline_test_set.csv')
validation = pd.read_csv('/kaggle/input/fsfeaturesbaseline/baseline_validation_set.csv')

In [None]:
train_x = train.drop(['valid_target', 'target'], axis=1)
train_y = train.loc[:, ['target']]
train_y_valid = train.loc[:, ['valid_target']]

test_x = test.drop(['valid_target', 'target'], axis=1)
test_y = test.loc[:, ['target']]
test_y_valid = test.loc[:, ['valid_target']]

validation_x = validation.drop(['valid_target', 'target'], axis=1)
validation_y = validation.loc[:, ['target']]
validation_y_valid = validation.loc[:, ['valid_target']]

train_x = make_scaled(train_x).drop('id', axis=1).reset_index().rename({'index': 'id'}, axis=1).set_index('id')
test_x = make_scaled(test_x).drop('id', axis=1).reset_index().rename({'index': 'id'}, axis=1).set_index('id')
validation_x = make_scaled(validation_x).drop('id', axis=1).reset_index().rename({'index': 'id'}, axis=1).set_index('id')

# Model tuning

In [None]:
errors = {}

In [None]:
from sklearn.ensemble import RandomForestRegressor

rfr_validator = Validator(RandomForestRegressor, max_depth=10)

rfr_validator.fit(test_x, test_y)
errors['rfr'] = rfr_validator.validate(validation_x, validation_y_valid)
residulas = get_statistics(errors['rfr'])

run['params'] = {
    'n_estimators': 100,
    'max_depth': 20
}

run['score/rmse/test'] = errors['rfr'][0]
run['score/rmse/train'] = errors['rfr'][1]
run['model'] = 'RandomForest'
run['dump/residuals'].upload(File.as_pickle(residulas))
run['dump/model'].upload(File.as_pickle(rfr_validator.model))

In [None]:
import neptune.new.integrations.sklearn as npt_utils

run['auto_model_summary'] = npt_utils.create_regressor_summary(
    rfr_validator.model, 
    train_x, 
    test_x, 
    train_y.to_numpy().transpose()[0], 
    test_y_valid.to_numpy().transpose()[0])

In [None]:
run.stop()

In [None]:
from time import sleep

while True:
    sleep(3)