### ***Project description*** ###

***Project description:*** https://www.kaggle.com/competitions/bike-sharing-demand/overview/description

***Project goal:*** combine historical usage patterns with weather data in order to forecast bike rental demand in the Capital Bikeshare program in Washington, D.C.

***Suggested evaluation metric:*** Root Mean Squared Logarithmic Error (RMSLE)

***Other used evalutaion metrics*** Mean Absolute Error(MAE), Mean Squared Error(MSE), Root Mean Squared Error(RMSE), R Squared (R2)

### ***0. Project preparation*** ###

---

In [None]:
# main upgrades
!pip install --upgrade neptune-client
!pip install --upgrade neptune

In [73]:
# main imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sklearn
from sklearn import set_config
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, Normalizer
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_squared_log_error

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

import catboost as ctb
import lightgbm as lgbm
import xgboost as xgb

import eli5
import time
import warnings
import json
from tqdm import tqdm
import neptune
from itertools import product

# minor settings
plt.style.use('ggplot')
%matplotlib inline
set_config(display='diagram')
warnings.filterwarnings('ignore', category=DeprecationWarning)

# global variables
PAD = 20
RANDOM_STATE = 0

In [74]:
# version check
def show_version(module_object: object, n: int = PAD) -> str:
    module_name = getattr(module_object, '__name__')
    module_ver = getattr(module_object, '__version__')
    dots = '.' * (n - len(module_name))
    
    print (f'{module_name}{dots}{module_ver}')


!python --version
module_list = [np, pd, sklearn, eli5, neptune]
for module in module_list:
    show_version(module)

Python 3.7.5
numpy...............1.20.0
pandas..............1.2.4
sklearn.............0.24.2
eli5................0.11.0
neptune.............0.15.2


In [75]:
# neptune init
with open('neptune_credentials.json') as f:
   neptune_credentials = json.load(f)

neptune.init(
    api_token = neptune_credentials['API_TOKEN'],
    project_qualified_name = neptune_credentials['PROJECT']
)

Project(DataWorkshop-Foundation/bike-sharing)

### ***1. Load data*** ###

---

In [76]:
df_train = pd.read_csv('inputs/train.csv')
X_train, X_test, y_train, y_test = train_test_split(df_train.drop(['count'], axis=1),
                                                    df_train['count'], 
                                                    test_size=0.3, 
                                                    random_state=RANDOM_STATE)
                                                    

In [77]:
cols_numerical = ['temp', 'atemp', 'humidity', 'windspeed']
cols_categorical = ['season', 'holiday', 'workingday', 'weather', 'month', 'year', 'day', 'hour', 'dayofweek', 'weekend']

In [78]:
df_train

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0000,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0000,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0000,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0000,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0000,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
10881,2012-12-19 19:00:00,4,0,1,1,15.58,19.695,50,26.0027,7,329,336
10882,2012-12-19 20:00:00,4,0,1,1,14.76,17.425,57,15.0013,10,231,241
10883,2012-12-19 21:00:00,4,0,1,1,13.94,15.910,61,15.0013,4,164,168
10884,2012-12-19 22:00:00,4,0,1,1,13.94,17.425,61,6.0032,12,117,129


### ***2. Custom data functions*** ###

---

In [79]:
def join_datasets(dataset_1: pd.DataFrame, dataset_2: pd.DataFrame) -> pd.DataFrame:
    '''
    Concatenate two datasets

    Arguments:
        dataset_1: first dataset to join
        dataset_2: second dataset to join
    
    Returns:
        Joined dataset
    '''

    merged_dataset = pd.concat([dataset_1, dataset_2]).reset_index(drop = True)
    if 'datetime' in merged_dataset.columns:
        merged_dataset.sort_values(by='datetime', inplace = True)
    return merged_dataset

In [80]:
def load_and_transform_data(dataset, pipeline_name):
    dataset = pipeline_name.fit_transform(dataset)
    X = dataset.drop(['count'], axis = 1).copy()
    y = dataset['count']
    return X, y

In [81]:
def preview_dataset(X, y):
    return pd.concat([X, y], axis=1)

### ***3. Custom data classes*** ###

---

In [82]:
class DataTransformer():
    '''
    Change the initial dataset into a new one based
    on passed function
    '''
    # copy parameter introduced to prevent SettingwithCopyWarning
    # https://www.dataquest.io/blog/settingwithcopywarning/
    def __init__(self, func, copy = True, **kwargs):
        self.func = func
        self.copy = copy

    def transform(self, input_df, **transform_params):
        input_df_ = input_df if not self.copy else input_df.copy()
        return self.func(input_df_,)

    def fit(self, X, y=None, **fit_params):
        return self

In [83]:
class ColumnSelector():
    '''
    Return a dataframe with predefined columns only
    '''

    def __init__(self, columns=None):
        self.columns = columns

    def transform(self, X, **transform_params):
        cpy_df = X[self.columns].copy()
        return cpy_df

    def fit(self, X, y=None, **fit_params):
        return self

In [84]:
class ColumnDroper():
    '''
    Return a dataframe without selected columns
    '''
    def __init__(self,columns):
        self.columns=columns

    def transform(self,X,y=None):
        return X.drop(self.columns,axis=1)

    def fit(self, X, y=None):
        return self 

### ***4. Custom feature functions*** ###

---

#### ***4.1. Add or change features*** ####

In [85]:
def make_cols_from_datetime(dataset: pd.DataFrame) -> pd.DataFrame:
    '''
    Convert a column to datetime format and create new columns: 
    'year', 'month', 'day', 'hour'
    
    Arguments:
        dataset: pandas DataFrame 
    
    Returns:
        dataset: transformed pandas DataFrame
    '''

    # convert string to datetime type
    dataset['datetime'] = pd.to_datetime(dataset['datetime'])

    # make new columns from datetime column
    dataset['year'] = dataset['datetime'].dt.year
    dataset['month'] = dataset['datetime'].dt.month
    dataset['day'] = dataset['datetime'].dt.day
    dataset['hour'] = dataset['datetime'].dt.hour
    dataset['dayofweek'] = dataset['datetime'].dt.dayofweek
    dataset['weekend'] = dataset['dayofweek'].map(lambda x: int(x in [6,7]))
    return dataset

In [86]:
def seasons_change(dataset: pd.DataFrame) -> pd.DataFrame:
    '''
    Set proper season duration and change their representation according
    to dataset legend

    Argument:
        dataset: pandas DataFrame
    
    Returns:
        dataset: pandas DataFrame
    '''

    changes = [
    ('2011-01-01', '2011-03-19', 4),
    ('2011-03-20', '2011-06-20', 1),
    ('2011-06-21', '2011-09-22', 2),
    ('2011-09-23', '2011-12-20', 3),
    ('2011-12-21', '2012-03-19', 4),
    ('2012-03-20', '2012-06-19', 1),
    ('2012-06-20', '2012-09-21', 2),
    ('2012-09-22', '2012-12-20', 3),
    ('2012-12-21', '2012-12-31', 4),
     ]

    for (start_date, end_date, new_season) in changes:
        dataset.loc[between_dates(dataset, start_date, '00', end_date, '23').index,'season'] = new_season
    return dataset

In [87]:
def generate_agg_features(dataset: pd.DataFrame, agg_name = np.median) -> pd.DataFrame:
    '''
    Calculate a monthly agg_function (like mean, median...) and add it to the dataframe

    Arguments:
        dataset: pandas DataFrame
    
    Returns:
        dataset: DataFrame with an additional column which contains the agg_function
        of bike shares in a month
    '''
    agg_dataset = dataset[['month', 'year', 'count']].groupby(['month', 'year']).agg(agg_name)
    agg_dataset = agg_dataset.reset_index()
    agg_dataset = agg_dataset.rename(columns = {'count':str(agg_name.__name__)})
    return pd.merge(dataset, agg_dataset, on=['month', 'year'], how = 'left')

In [88]:
def normalize_X(dataset: pd.DataFrame) -> pd.DataFrame:
    '''
    Normalize the X variables. Equivalent of MinMaxScaler

    Arguments:
        dataset: pandas DataFrame
    
    Returns:
        dataset: pandas DataFrame
    '''
    feats = select_X_dtypes(dataset, dtypes = np.number).columns
    result = dataset[feats].copy()
    for feature_name in result.columns:
        max_value = dataset[feature_name].max()
        min_value = dataset[feature_name].min()
        result[feature_name] = (dataset[feature_name] - min_value) / (max_value - min_value)
    result['count'] = dataset['count']
    return result

In [89]:
def log_y(dataset: pd.DataFrame) -> pd.DataFrame:
    '''
    Apply log + 1 function to target variable
    '''
    dataset['count'] = dataset['count'].apply(lambda x: np.log1p(x))
    return dataset 

#### ***4.2. Select features*** ####

In [90]:
def between_dates(dataset: pd.DataFrame, start_date: str, start_time: str, end_date: str, end_time: str) -> pd.DataFrame:
    '''
    Show dataframe between two dates and timestamps
    
    Arguments:
        dataset: pandas DataFrame 
        start_date: date which the dataset must be trimmed from
        start_time: hour of day from start_date
        end_date: date which the dataset must be trimmed to
        end_time: hour of day from end_date

    Returns:
        A DataFrame between (start_date, start_time) and (end_date, end_time)
    '''

    start_dt = f'{start_date} {start_time}:00:00'
    end_dt = f'{end_date} {end_time}:00:00'
    mask = (dataset['datetime'] >= start_dt) & (dataset['datetime'] <= end_dt)
    return dataset[mask]

In [91]:
def get_feats(dataset: pd.DataFrame, black_list = None, white_list = None) -> list:
    '''
    Return a list of features that appear in a given white_list and don't appear in a given 
    black_list

    Arguments:
        dataset: pandas DataFrame

    Returns:
        feats: list of features
    '''
    if white_list == None:
        feats = dataset.columns.to_list()
    else:
        feats = white_list

    if black_list is None:
        black_list = ['Unnamed: 0', 'datetime', 'casual', 'registered']

    feats = [feat for feat in feats if feat not in black_list]
    return feats

In [92]:
def select_X_dtypes(dataset: pd.DataFrame, dtypes = np.number) -> pd.DataFrame:
    '''
    Return a dataset with a specific data type but without the target variable 

    Argument:
        dataset: pandas DataFrame
    
    Returns:
        dataset: pandas DataFrame
    '''
    return dataset.drop('count', axis =1).select_dtypes(include=dtypes)


### ***5. Evaluation metrics*** ###

In [93]:
def rmsle(y_true: np.ndarray, y_pred: np.ndarray) -> np.float64:
    '''
    The Root Mean Squared Log Error (RMSLE) metric 

    Arguments: 
        y_true: the ground truth labels given in the dataset
        y_pred: our predictions
        
    Returns: 
        The RMSLE score
    '''

    return np.sqrt(mean_squared_log_error(y_true, y_pred))

### ***6. Pipeline definitions*** ###

---

#### ***6.1. Standard num and cat pipeline definiton*** ####

In [94]:
# lists of transformers
scalers = [StandardScaler(), MinMaxScaler(), Normalizer()]
cat_transformers = [OrdinalEncoder(), OneHotEncoder()]


transformer_numerical = Pipeline(steps = [
    ('num_trans', StandardScaler())
])

transformer_categorical = Pipeline(steps = [
    ('cat_trans', OneHotEncoder())
])


num_cat_preprocess = ColumnTransformer(transformers = [
    ('numerical', transformer_numerical, cols_numerical),
    ('categorical', transformer_categorical, cols_categorical)
])

#### ***6.2. Custom pipelines*** ####

In [95]:
main_preprocess = Pipeline(steps = [
    ('make_dt_columns', DataTransformer(make_cols_from_datetime)),
    ('change_seasons', DataTransformer(seasons_change)),
    # ('add_means', DataTransformer(lambda df: generate_agg_features(df, np.mean))),
    # ('add_medians', DataTransformer(lambda df: generate_agg_features(df, np.median))),
    # ('drop', ColumnDroper(['datetime']))
], verbose=True)


### ***7. Custom models*** ###

In [96]:
xparams = {'n_estimators': 100, 'max_depth': 10, 'random_state': 0, 'verbosity':0, 'use_label_encoder': False }
cparams = {'n_estimators': 100, 'max_depth': 10, 'random_state': 0 , 'silent': True}
lparams = {'n_estimators': 100, 'max_depth': 10, 'random_state': 0, 'verbosity':0, 'silent': True}

models = [
    ('XGBoostRegressor', xgb.XGBRegressor(**xparams)),
    ('CatBoostRregressor',  ctb.CatBoostRegressor(**cparams)),
    ('LGBMRegressor', lgbm.LGBMRegressor(**lparams)),
    ('RandomForest', RandomForestRegressor())
]

### ***8. Main run*** ###

---

In [99]:
# empty dataframe for locally keeping track of model results
models_df = pd.DataFrame()

# product of the lists (every possible combination of three lists)
# it's needed because we will only make one for-loop
# instead of three inner for-loops
grid = list(product(models, scalers, cat_transformers))

# pipeline template 
pipe = Pipeline(steps = [
    ('main_preprocessing', main_preprocess),
    ('num_cat_preprocessing', num_cat_preprocess),
    ('regressor', None)
])

# some constants
main_prep_step_name = pipe.steps[0][0] # name of the first pipeline in 'pipe'
steps_in_main_prep = ', '.join([name[0] for name in pipe.steps[0][1].steps]) # list of steps in 'main_preprocessing'
df_median = np.median(df_train['count']) # median of target value
text_input = ['model', 'model_params', 'num_trans', 'cat_trans', 'prep_name', 'prep_steps'] # string values for logging


for n, elem in enumerate(tqdm(grid)):
    model = grid[n][0] # tuple (model_name, model_object)
    num_tr = grid[n][1] # individual scaler, for example: StandardScaler()
    cat_tr = grid[n][2] # individual categorical transformer, for example OneHotEncoder()

    neptune.create_experiment(f'test-experiment-{n}') # name of experiment

    pipe_params = {
        'main_preprocessing': main_preprocess,
        'num_cat_preprocessing__numerical__num_trans': num_tr,
        'num_cat_preprocessing__categorical__cat_trans': cat_tr,
        'regressor': model[1]
    }

    pipe.set_params(**pipe_params)

    start_time = time.time()
    pipe.fit(X_train, y_train)
    end_time = time.time()

    y_pred = pipe.predict(X_test)
    y_pred = [df_median if y<0 else y for y in y_pred] # no negative values

    # metrics
    score_mae = mean_absolute_error(y_test, y_pred)
    score_mse = mean_squared_error(y_test, y_pred)
    score_rmse = np.sqrt(score_mse) 
    score_rmsle = rmsle(y_test, y_pred)
    score_r2 = r2_score(y_test, y_pred)

    model_params = str(model[1].get_params())
    
    param_dict = {
        'model': model[1].__class__.__name__,
        'model_params': model_params,
        'num_trans': num_tr.__class__.__name__,
        'cat_trans': cat_tr.__class__.__name__,
        'prep_name': main_prep_step_name,
        'prep_steps': steps_in_main_prep,
        'score_mae': score_mae,
        'score_mse': score_mse,
        'score_rmse': score_rmse,
        'score_rmsle': score_rmsle,
        'score_r2': score_r2,
        'time_elapsed': end_time - start_time
    }

    # neptune log
    for key, value in param_dict.items():
        if key in text_input:
            neptune.log_text(key, value)
        else:
            neptune.log_metric(key, value)
    
    # add row for local results
    models_df = models_df.append(pd.DataFrame(param_dict, index = [0]))

models_df.reset_index(drop = True, inplace = True)

  0%|          | 0/24 [00:00<?, ?it/s]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-2
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


  4%|▍         | 1/24 [00:04<01:35,  4.13s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-3
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


  8%|▊         | 2/24 [00:08<01:28,  4.05s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-4
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 12%|█▎        | 3/24 [00:12<01:25,  4.06s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-5
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 17%|█▋        | 4/24 [00:16<01:20,  4.02s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-6
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 21%|██        | 5/24 [00:20<01:18,  4.15s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-7
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 25%|██▌       | 6/24 [00:26<01:24,  4.68s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-8
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 29%|██▉       | 7/24 [00:30<01:16,  4.47s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-9
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 33%|███▎      | 8/24 [00:34<01:12,  4.54s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-10
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 38%|███▊      | 9/24 [00:38<01:05,  4.35s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-11
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 42%|████▏     | 10/24 [00:43<01:01,  4.41s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-12
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 46%|████▌     | 11/24 [00:48<01:00,  4.66s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-13
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 50%|█████     | 12/24 [00:53<00:57,  4.81s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-14
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s
You can set `force_col_wise=true` to remove the overhead.


 54%|█████▍    | 13/24 [00:57<00:48,  4.37s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-15
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


 58%|█████▊    | 14/24 [01:00<00:40,  4.07s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-16
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


 62%|██████▎   | 15/24 [01:03<00:34,  3.87s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-17
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


 67%|██████▋   | 16/24 [01:07<00:29,  3.71s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-18
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s
You can set `force_col_wise=true` to remove the overhead.


 71%|███████   | 17/24 [01:10<00:25,  3.58s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-19
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s
You can set `force_col_wise=true` to remove the overhead.


 75%|███████▌  | 18/24 [01:13<00:21,  3.52s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-20
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 79%|███████▉  | 19/24 [01:20<00:22,  4.45s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-21
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 83%|████████▎ | 20/24 [02:05<01:06, 16.58s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-22
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 88%|████████▊ | 21/24 [02:11<00:40, 13.54s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-23
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 92%|█████████▏| 22/24 [02:55<00:44, 22.42s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-24
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


 96%|█████████▌| 23/24 [03:02<00:17, 17.90s/it]

https://app.neptune.ai/DataWorkshop-Foundation/bike-sharing/e/BIKE-25
[Pipeline] ... (step 1 of 2) Processing make_dt_columns, total=   0.0s
[Pipeline] .... (step 2 of 2) Processing change_seasons, total=   0.0s


100%|██████████| 24/24 [03:46<00:00,  9.42s/it]


In [100]:
neptune.stop()

In [72]:
models_df

Unnamed: 0,model,model_params,num_trans,cat_trans,prep_name,prep_steps,score_mae,score_mse,score_rmse,score_rmsle,score_r2,time_elapsed
0,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",StandardScaler,OrdinalEncoder,main_preprocessing,"make_dt_columns, change_seasons",25.390761,1791.02746,42.320532,0.412238,0.944849,1.042613
1,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",StandardScaler,OneHotEncoder,main_preprocessing,"make_dt_columns, change_seasons",35.335349,3088.83859,55.577321,0.745846,0.904885,0.693325
2,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",MinMaxScaler,OrdinalEncoder,main_preprocessing,"make_dt_columns, change_seasons",25.412235,1793.003247,42.343869,0.412353,0.944788,0.860783
3,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",MinMaxScaler,OneHotEncoder,main_preprocessing,"make_dt_columns, change_seasons",35.935276,3117.489269,55.834481,0.769404,0.904003,0.727006
4,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",Normalizer,OrdinalEncoder,main_preprocessing,"make_dt_columns, change_seasons",27.04536,2013.649492,44.873706,0.404867,0.937994,0.910109
5,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",Normalizer,OneHotEncoder,main_preprocessing,"make_dt_columns, change_seasons",36.785542,3365.412999,58.01218,0.705086,0.896369,1.274437
6,CatBoostRegressor,"{'loss_function': 'RMSE', 'silent': True, 'max...",StandardScaler,OrdinalEncoder,main_preprocessing,"make_dt_columns, change_seasons",27.28696,1993.316315,44.646571,0.63411,0.93862,0.631282
7,CatBoostRegressor,"{'loss_function': 'RMSE', 'silent': True, 'max...",StandardScaler,OneHotEncoder,main_preprocessing,"make_dt_columns, change_seasons",34.34773,2836.416376,53.258017,0.857814,0.912658,1.199998
8,CatBoostRegressor,"{'loss_function': 'RMSE', 'silent': True, 'max...",MinMaxScaler,OrdinalEncoder,main_preprocessing,"make_dt_columns, change_seasons",27.28696,1993.316315,44.646571,0.63411,0.93862,0.610579
9,CatBoostRegressor,"{'loss_function': 'RMSE', 'silent': True, 'max...",MinMaxScaler,OneHotEncoder,main_preprocessing,"make_dt_columns, change_seasons",34.34773,2836.416376,53.258017,0.857814,0.912658,1.217865
