In [None]:
import numpy as np
#np_version = "1.19.5"  # Replace with the version you want to use
#np = __import__(f"numpy-{np_version}", globals(), locals(), [], 0)
print(np.__version__)

: 

In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

#IMPORTS
import yfinance as yf
import pandas as pd

import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from scipy import stats
import json
from backtesting import Backtest, Strategy

import utils.labeling_algorithm as labeling_algorithm
import utils.la2 as la2
from utils.features_util import compute_features, get_available_features
from utils.cross_validation import custom_ten_fold_cv_selection, remove_monoton_instances, simple_ten_fold_cv_selection
import utils.sample_weights as sample_weights_util
import utils.xgb_utils as xgb_utils

ModuleNotFoundError: No module named 'yfinance'

In [None]:
#PARAMETERS
symbols = ['AMD', 'CLX']
start_date = '2019-11-30'
end_date = '2021-11-15'
#number of previous days to observe for next day's prediction
window = 30

close_prices = yf.download(symbols, start_date, end_date, progress=False)['Close']
market_series = yf.download('SPY', start_date, end_date, progress=False)['Close']

print(close_prices.shape)
print(close_prices.head(1))

## 3-state labeling

In [None]:
for stock in symbols:
    labels = labeling_algorithm.get_series_labels(close_prices[window:][stock], 0.05, 11)

    plt.figure(figsize=(15, 5), facecolor='w')
    plt.plot(close_prices[window:].index, close_prices[window:][stock])
    plt.title('{} prices'.format(stock))
    plt.show()
    plt.figure(figsize=(15, 3), facecolor='w')
    plt.plot(labels.index, labels)
    plt.title('{} 3-state labels'.format(stock))
    plt.show()

## 2-state labeling

In [None]:
for stock in symbols:
    labels = la2.get_series_labels(close_prices[window:][stock], 0.05)

    plt.figure(figsize=(15, 5), facecolor='w')
    plt.plot(close_prices[window:].index, close_prices[window:][stock])
    plt.title('{} prices'.format(stock))
    plt.show()
    plt.figure(figsize=(15, 3), facecolor='w')
    plt.plot(labels.index, labels)
    plt.title('{} 2-state labels'.format(stock))
    plt.show()

## Features

In [None]:
features = get_available_features()
#create 2-lavel column dataframe, first level is stock symbol, second level is feature name
features_df = pd.DataFrame(columns = pd.MultiIndex.from_product([symbols, features]))

for stock in symbols:
    features_df[stock] = compute_features(stock, close_prices[stock], market_series, window)

print(features_df.head(2))

## 10-fold corss validation

In [None]:
CVindices = simple_ten_fold_cv_selection(len(close_prices[window:]), window)

plt.figure(figsize=(12, 8), facecolor='w')
for i in range(0, len(CVindices)):
    trainIndices = CVindices[i][0]
    valIndices = CVindices[i][1]

    plt.plot(close_prices[window:].index[trainIndices], [i]*len(trainIndices), 'bo')
    plt.plot(close_prices[window:].index[valIndices], [i]*len(valIndices), 'ro')

plt.legend(['train', 'validation'])
plt.show()


In [None]:
CVindices = custom_ten_fold_cv_selection(len(close_prices[window:]), window)

plt.figure(figsize=(12, 8), facecolor='w')
for i in range(0, len(CVindices)):
    trainIndices = CVindices[i][0]
    valIndices = CVindices[i][1]

    plt.plot(close_prices[window:].index[trainIndices], [i]*len(trainIndices), 'bo')
    plt.plot(close_prices[window:].index[valIndices], [i]*len(valIndices), 'ro')

plt.legend(['train', 'validation'])
plt.show()


## Sample weights
The greater the future profit/loss, the greater the sample weight.

In [None]:
stock='CLX'
labels = la2.get_series_labels(close_prices[window:][stock], 0.05)
sample_weights = sample_weights_util.get(close_prices[window:][stock], labels)

plt.figure(figsize=(15, 5), facecolor='w')
plt.plot(close_prices[window:][stock].index, close_prices[window:][stock])
plt.title('{} prices'.format(stock))
plt.xlim(sample_weights.index[0], sample_weights.index[-1])
plt.show()

plt.figure(figsize=(15, 5), facecolor='w')
plt.plot(sample_weights.index, sample_weights)
plt.title('{} sample weights'.format(stock))
plt.xlim(sample_weights.index[0], sample_weights.index[-1])
plt.show()

plt.figure(figsize=(15, 5), facecolor='w')
plt.plot(labels.index, labels)
plt.title('{} 2-state labels'.format(stock))
plt.xlim(sample_weights.index[0], sample_weights.index[-1])
plt.show()

# XGBoost for CLX with 2-state labeling algorithm

In [2]:
stock = 'CLX'
window = 30

start_date_train = '2005-01-01'
end_date_train = '2015-12-31'
prices_train = yf.download(stock, start_date_train, end_date_train, progress=False)['Close']
market_prices_train = yf.download('SPY', start_date_train, end_date_train, progress=False)['Close']

start_date_test = '2016-01-01'
end_date_test = '2017-12-31'
prices_test = yf.download(stock, start_date_test, end_date_test, progress=False)['Close']
market_prices_test = yf.download('SPY', start_date_test, end_date_test, progress=False)['Close']

#FEATURES, 'X' of model
features_train = compute_features(stock, prices_train, market_prices_train, window)
features_test = compute_features(stock, prices_test, market_prices_test, window)

#LABELS, 'Y' of model
labels_train = la2.get_series_labels(prices_train[window:], 0.05)
labels_train[labels_train == -1] = 0
labels_test = la2.get_series_labels(prices_test[window:], 0.05)
labels_test[labels_test == -1] = 0

counts, values = np.unique(labels_train, return_counts=True)

#SAMPLE WEIGHTS
sw_train = sample_weights_util.get(prices_train[window:], labels_train)
sw_test = sample_weights_util.get(prices_test[window:], labels_test)

#CROSS VALIDATION INDICES
cv_inidces_list = custom_ten_fold_cv_selection(len(prices_train[window:]), window)
cv_inidces_list = remove_monoton_instances(cv_inidces_list, labels_train)

#SCALE POS WEIGHT
pos_weight = np.sum(labels_train == 0) / np.sum(labels_train == 1)

In [3]:
print('Simple model:')
xgb_utils.bayes_search_cv_custom(features_train, labels_train, cv_inidces_list, saving_file='saved_models/bayes/{}_simple.json'.format(stock))
print('Class balancing model:')
xgb_utils.bayes_search_cv_custom(features_train, labels_train, cv_inidces_list, scale_pos_weight=pos_weight, saving_file='saved_models/bayes/{}_class_balancing.json'.format(stock))
print('Sample weight model:')
xgb_utils.bayes_search_cv_custom(features_train, labels_train, cv_inidces_list, sw_train=sw_train, saving_file='saved_models/bayes/{}_sample_weight.json'.format(stock))

Simple model:


AttributeError: module 'numpy' has no attribute 'int'.
`np.int` was a deprecated alias for the builtin `int`. To avoid this error in existing code, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

All models are optimized with GridSearchCV.
<br><br>
'simple' - model with basic parameters without class/sample weighting
<br>
'simple_sw' - 'simple' model tested on sample weighted test data
<br>
'class_balan' - model with class balancing
<br>
'class_balan_sw' - 'class_balan' model tested on sample weighted test data
<br>
'sample_weight' - model with sample weighting
<br>
'sample_weight_sw' - 'sample_weight' model tested on sample weighted test data

In [3]:
m_simple = xgb_utils.test_model(features_train, features_test, labels_train, labels_test, 'saved_models/{}_simple.json'.format(stock))
m_simple_sw = xgb_utils.test_model(features_train, features_test, labels_train, labels_test, 'saved_models/{}_simple.json'.format(stock), sw_test=sw_test)
m_class_balan = xgb_utils.test_model(features_train, features_test, labels_train, labels_test, 'saved_models/{}_class_balancing.json'.format(stock))
m_class_balan_sw = xgb_utils.test_model(features_train, features_test, labels_train, labels_test, 'saved_models/{}_class_balancing.json'.format(stock), sw_test=sw_test)
m_sample_weight = xgb_utils.test_model(features_train, features_test, labels_train, labels_test, 'saved_models/{}_sample_weight.json'.format(stock), sw_train=sw_train)
m_sample_weight_sw = xgb_utils.test_model(features_train, features_test, labels_train, labels_test, 'saved_models/{}_sample_weight.json'.format(stock), sw_train=sw_train, sw_test=sw_test)

m_df = pd.DataFrame(columns = ['Sample Weight Test', 'acc', 'f1', 'mse', 'auc'])
m_df.loc['simple'] = [False] + list(m_simple.values())
m_df.loc['simple_sw'] = [True] + list(m_simple_sw.values())
m_df.loc['class_balan'] = [False] + list(m_class_balan.values())
m_df.loc['class_balan_sw'] = [True] + list(m_class_balan_sw.values())
m_df.loc['sample_weight'] = [False] + list(m_sample_weight.values())
m_df.loc['sample_weight_sw'] = [True] + list(m_sample_weight_sw.values())

print("Models testing for 'CLX' stock.")
print(m_df)

Models testing for 'CLX' stock.
                  Sample Weight Test       acc        f1       mse       auc
simple                         False  0.505285  0.535714  0.494715  0.506035
simple_sw                       True  0.498167  0.593888  0.501833  0.480107
class_balan                    False  0.765328  0.769231  0.234672  0.765592
class_balan_sw                  True  0.788277  0.816232  0.211723  0.780872
sample_weight                  False  0.786469  0.800000  0.213531  0.787272
sample_weight_sw                True  0.805379  0.836856  0.194621  0.794216


### Backtesting

In [None]:
stock = 'CLX'
window = 30
refit_frequency = 5
train_data_size = 2500

In [None]:
start_date_train = '2005-01-01'
end_date_train = '2015-12-31'
prices_train = yf.download(stock, start_date_train, end_date_train, progress=False)['Close']
market_prices_train = yf.download('SPY', start_date_train, end_date_train, progress=False)['Close']
#FEATURES, 'X' of model
features_train = compute_features(stock, prices_train, market_prices_train, window)
#LABELS, 'Y' of model
labels_train = la2.get_series_labels(prices_train[window:], 0.05)
labels_train[labels_train == -1] = 0

with open('saved_models/{}_simple.json'.format(stock), 'r') as fp:
    model_params = json.load(fp)
xgb_model = xgb.XGBClassifier(**model_params)
xgb_model.fit(features_train, labels_train)
print()

In [None]:
predictions = []
class XGB2StateStrategy(Strategy):
    
    
    def init(self):
        self.window_size = window
        self.model = xgb_model
        self.feature_labels = get_available_features()
        self.refit_frequency = refit_frequency
        self.counter = 0
        self.train_data_size = train_data_size
        self.X_train = features_train
        self.Y_train = labels_train
    
    def next(self):
        self.counter += 1
        if self.counter == self.refit_frequency:
            self.X_train = self.X_train[-self.train_data_size:]
            self.Y_train = self.Y_train[-self.train_data_size:]
            self.model.fit(self.X_train, self.Y_train)
            self.counter = 0
        
        X_test = self.data.df[self.feature_labels].tail(1)
        prediction = self.model.predict(X_test)[0]
        
        self.X_train.append(X_test)
        self.Y_train = np.append(self.Y_train, self.data.df['Label'].tail(1))
        predictions.append(prediction)
        
        if prediction == 1:
            if self.position.is_short or not self.position:
                self.position.close()
                self.buy()
        else:
            if self.position.is_long or not self.position:
                self.position.close()
                self.sell()

In [None]:
start_date = '2016-01-01' #actual starting date is window_size working days after this date
end_date = '2023-01-01'
stock_prices_df = yf.download(stock, start_date, end_date, progress=False)[['Open', 'High', 'Low', 'Close', 'Volume']]
market_close_df = yf.download('SPY', start_date, end_date, progress=False)['Close']

features_df = compute_features(stock, stock_prices_df['Close'], market_close_df, window)

labels = la2.get_series_labels(stock_prices_df['Close'][window:], 0.05)
labels[labels == -1] = 0

data_df = pd.concat([stock_prices_df[window:], features_df], axis=1)
data_df['Label'] = labels

In [None]:
print(data_df.columns)

In [None]:
bt = Backtest(data_df, XGB2StateStrategy, cash=10_000)

# Run the backtest
stats = bt.run()
print(stats)

# Plot the results
bt.plot(filename='backtests/{}_simple.html'.format(stock))

# Print prediction accuracy
print('Model Accuracy =', accuracy_score(labels[:-1], predictions))