# Classification using Cyclic Boosting

First, install the  package and its dependencies

```sh
!pip install cyclic-boosting
```

In [None]:
import pandas as pd
import numpy as np

Let's load the test dataset from Blue-Yonder-OSS

In [None]:
import urllib.request

proxy = urllib.request.ProxyHandler({"http": "http://proxy.mei.co.jp:8080"})
opener = urllib.request.build_opener(proxy)
urllib.request.install_opener(opener)

urllib.request.urlretrieve(
    "https://raw.githubusercontent.com/Blue-Yonder-OSS/"
    "cyclic-boosting/main/tests/integration_test_data.csv",
    "data.csv",
)

path = "./data.csv"
df = pd.read_csv(path)

In [None]:
df.head()

# Prepare Data

The variable LAMBDA is potential demand and is normally unobtainable data, therefore it should be deleted.

Categorical variables must be converted to int type and continuous variables to float type.

In [None]:
def drop_LAMBDA(df):
    df = df.drop(columns="LAMBDA")
    return df


def convert_datatype(df, col):
    if df[col].dtype == np.float64:
        df = df.astype({col: np.int64})
    elif df[col].dtype == np.int64:
        df = df.astype({col: np.float64})
    return df


df_test = df.copy()
df_test = drop_LAMBDA(df_test)
df_test = convert_datatype(df_test, col="SCHOOL_HOLIDAY")
df_test.to_csv("./data_test.csv", index=False)

In [None]:
df_test.head()

# Automated Machine Learning with Tornado
With tornado, you can automatically perform data preparation, feature property setting, hyperparameter tuning, model building, training, evaluation, and plotting!

In [None]:
from cyclic_boosting.tornado import Generator, Manager, Trainer

data_deliverler = Generator.TornadoDataModule("data_test.csv")
manager = Manager.TornadoVariableSelectionModule()
trainer = Trainer.SqueezeTrainer(data_deliverler, manager)
trainer.run(target="sales", log_policy="compute_COD", verbose=False)

# Load the best model and make predictions.

Get the best model path.

In [None]:
import pickle
from pathlib import Path

model_nos = []
for p in sorted(Path("./models/").glob("model*")):
    model_nos.append(str(p)[str(p).find("_") + 1 :])
model_path = f"./models/model_{model_nos[-1]}/model_{model_nos[-1]}.pkl"
print(model_path)

Make predictions with the best model.

In [None]:
data = {
    "dayofweek": [4],
    "dayofyear": [190],
    "event": [0],
    "l_id": [1],
    "normal_price": [10.34],
    "p_id": [20],
    "pg_id_1": [1],
    "pg_id_2": [1],
    "pg_id_3": [1],
    "promotion_type": [1],
    "sales_area": [6321.6800893695445],
    "sales_price": [10.34],
    "school_holiday": [0],
}

X = pd.DataFrame(data)

with open(model_path, "rb") as f:
    CB_est = pickle.load(f)
    yhat = CB_est.predict(X.copy())
    print(yhat)