# Regression using Cyclic Boosting

First, install the  package and its dependencies

```sh
!pip install cyclic-boosting
```

In [1]:
import pandas as pd
import numpy as np
import os
import datetime

Let's use the test dataset from kaggle

For time-series data, a "date" column must be included to indicate the date and time the data was obtained. The column name and format must be consistent. The "dayofweek" column for the day of the week and the "dayofyear" column for the total number of days in the year are automatically created if not already present, but if they are already present, the column names must be correct.

This dataset has hourly data. In this dataset, the "instant" column is the data number. The "casual" and "registered" columns are the breakdown of sales, so they should be deleted.

In [2]:
parpath = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
df = pd.read_csv(parpath + "/bike_sharing_data/hour.csv")
df = df.rename(columns={'dteday': 'date', 'weekday': 'dayofweek'})
df = df.drop(columns=['instant', 'casual', 'registered'])
df["date"] = pd.to_datetime(df["date"])
df['date'] = df['date'] + df['hr'].map(lambda x: datetime.timedelta(hours=float(x)))

df.to_csv("./bike_sharing_hour.csv", index=False)

In [3]:
df.head()

Unnamed: 0,date,season,yr,mnth,hr,holiday,dayofweek,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,2011-01-01 00:00:00,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,16
1,2011-01-01 01:00:00,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,40
2,2011-01-01 02:00:00,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,32
3,2011-01-01 03:00:00,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,13
4,2011-01-01 04:00:00,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,1


# Automated Machine Learning with Tornado
With tornado, you can automatically perform data preparation, feature property setting, hyperparameter tuning, model building, training, evaluation, and plotting!

In [4]:
from cyclic_boosting.tornado import Generator, Manager, Trainer

data_deliverler = Generator.TornadoDataModule("./bike_sharing_hour.csv")
manager = Manager.TornadoVariableSelectionModule()
trainer = Trainer.SqueezeTrainer(data_deliverler, manager)
trainer.run(target="cnt", log_policy="compute_COD", verbose=False)

Data interval is 'hourly'. If not, give
    the data_interval option in the TornadoDataModule.

Auto analysis target ['temp', 'atemp', 'hum', 'windspeed']
    has_trend: ['temp', 'atemp', 'windspeed']
    has_seasonality: []
    has_up_monotonicity: []
    has_down_monotonicity: []
    has_linearity: []
    has_missing: []


iter: 90 / 90 
TRUNCATED
['hr', 'atemp', 'temp', 'hum', 'mnth', 'dayofyear', 'season', 'yr', 'weathersit', 'windspeed', 'dayofweek', 'workingday', 'holiday', ('hr', 'atemp'), ('hr', 'temp'), ('hr', 'workingday'), ('hr', 'dayofweek'), ('yr', 'hr'), ('mnth', 'hr'), ('season', 'hr'), ('hr', 'weathersit'), ('hr', 'hum'), ('hr', 'holiday'), ('hr', 'windspeed'), ('temp', 'hum'), ('atemp', 'hum'), ('mnth', 'hum'), ('mnth', 'atemp'), ('yr', 'atemp'), ('mnth', 'temp'), ('yr', 'temp'), ('temp', 'atemp'), ('season', 'atemp'), ('season', 'hum'), ('season', 'temp'), ('atemp', 'windspeed'), ('weathersit', 'atemp'), ('temp', 'dayofyear'), ('atemp', 'dayofyear'), ('weathersit', 'temp'), ('dayofweek', 'atemp'), ('workingday', 'atemp'), ('temp', 'windspeed'), ('holiday', 'atemp'), ('hum', 'dayofyear'), ('workingday', 'temp'), ('dayofweek', 'temp'), ('yr', 'hum'), ('holiday', 'temp'), ('hr', 'dayofyear'), ('yr', 'mnth'), ('season', 'yr'), ('weathersit', 'hum'), ('dayofweek', 'hum'), ('yr', 'dayofyear'), 

# Load the best model and make predictions.

Get the best model path.

In [8]:
import pickle
from pathlib import Path

model_nos = []
for p in sorted(Path("./models/").glob("model*")):
    model_nos.append(str(p)[str(p).find("_") + 1 :])
model_path = f"./models/model_{model_nos[-1]}/model_{model_nos[-1]}.pkl"
print(model_path)

./models/model_84/model_84.pkl


Make predictions with the best model.

In [12]:
data = {
    'season': [4],
    'yr': [0],
    'mnth': [11],
    'hr': [18],
    'holiday': [0],
    'workingday': [1],
    'weathersit': [2],
    'temp': [0.341667],
    'atemp': [0.323221],
    'hum': [0.575833],
    'windspeed': [0.305362],
    'dayofweek': [4],
    'dayofyear': [180],
}
X = pd.DataFrame(data)

with open(model_path, "rb") as f:
    CB_est = pickle.load(f)
    yhat = CB_est.predict(X.copy())
    print(yhat)

{'season': 4, 'yr': 4, 'mnth': 4, 'hr': 4, 'holiday': 4, 'dayofweek': 4, 'workingday': 4, 'weathersit': 4, 'temp': 1, 'atemp': 1, 'hum': 1, 'windspeed': 1, 'dayofyear': 4}
[344.7115857]
