# CatBoost Algorithms

CatBoost (categorical boosting) is an algorithm for gradient boosting on decision trees. Is a machine learning technique created by Yandex; therefore, it outperforms many existing boosting such as XGBoost and Light GBM. 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# fix_yahoo_finance is used to fetch data 
import fix_yahoo_finance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2007-01-01'
end = '2018-11-16'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-01-03,20.08,20.4,19.35,19.52,19.52,28350300
2007-01-04,19.66,19.860001,19.32,19.790001,19.790001,23652500
2007-01-05,19.540001,19.91,19.540001,19.709999,19.709999,15902400
2007-01-08,19.709999,19.860001,19.370001,19.469999,19.469999,15814800
2007-01-09,19.450001,19.709999,19.370001,19.65,19.65,14494200


In [3]:
dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)
dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)
dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)
dataset['Returns'] = dataset['Adj Close'].pct_change()
dataset = dataset.dropna()
dataset.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Increase_Decrease,Buy_Sell_on_Open,Buy_Sell,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-01-04,19.66,19.860001,19.32,19.790001,19.790001,23652500,0,0,0,0.013832
2007-01-05,19.540001,19.91,19.540001,19.709999,19.709999,15902400,0,1,0,-0.004043
2007-01-08,19.709999,19.860001,19.370001,19.469999,19.469999,15814800,0,0,1,-0.012177
2007-01-09,19.450001,19.709999,19.370001,19.65,19.65,14494200,1,1,1,0.009245
2007-01-10,19.639999,20.02,19.5,20.01,20.01,19783200,1,1,1,0.018321


In [4]:
dataset.shape

(2991, 10)

In [5]:
X = dataset[['Open', 'High', 'Low', 'Volume']].values
y = dataset['Buy_Sell'].values

In [6]:
import catboost as cb
from catboost import CatBoostClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split
# from sklearn.model_selection import GridSearchCV

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [8]:
cb = CatBoostClassifier(iterations=5, learning_rate=0.1)
cb.fit(X_train, y_train)

0:	learn: 0.6923219	total: 150ms	remaining: 599ms
1:	learn: 0.6914714	total: 241ms	remaining: 362ms
2:	learn: 0.6904868	total: 339ms	remaining: 226ms
3:	learn: 0.6895988	total: 433ms	remaining: 108ms
4:	learn: 0.6889079	total: 520ms	remaining: 0us


<catboost.core.CatBoostClassifier at 0x24d1c4560f0>

In [9]:
print('Model is fitted: ' + str(cb.is_fitted()))
print('Model params:')
print(cb.get_params())

Model is fitted: True
Model params:
{'loss_function': 'Logloss', 'learning_rate': 0.1, 'iterations': 5}


In [10]:
y_pred = cb.predict(X_test)

In [11]:
y_pred

array([ 1.,  0.,  0.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  1.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,
        1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
        1.,  1.,  1.,  0.,  1.,  0.,  0.,  1.,  1.,  0.,  0.,  0.,  1.,
        0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,
        1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,
        0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,
        0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  1.,
        1.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  1.,  0.,  1.,
        0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,
        0.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0

In [12]:
print('CatBoost Score:', cb.score(X_test, y_test))

CatBoost Score: 0.544240400668
