<a href="https://colab.research.google.com/github/Julita257/UMwF/blob/Case-3/Case_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Stwórz model generujący sygnały kupna i sprzedaży na rynku

Decyzje ma generować jeden z algorytmów uczenia maszynowego, po optymalizacji hiperparametrów

Zrób backtest wybranego modelu, użyj biblioteki przystosowanej do backtestów

Pamiętaj o wnioskach i wizualizacji wyników

Inwestujesz w wylosowaną spółkę przez określony okres, ale można wykorzystać także inne dane niż historyczne (np. obliczone wskaźniki)

Strategia inwestycyjna (decyzje wejścia i wyjścia) dla spółki  Amazon (AMZN), test w okresie od 01.01.2024 - 06.05.2024

In [13]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import plotly.express as px
import plotly.graph_objs as go

start_date = '2024-01-01'
end_date = '2024-06-05'

data = yf.download('AMZN', start=start_date, end=end_date)

data['Returns'] = data['Close'].pct_change()
data['SMA_10'] = data['Close'].rolling(window=10).mean()
data['SMA_50'] = data['Close'].rolling(window=50).mean()

data.dropna(inplace=True)

data['Signal'] = 0
data.loc[data['SMA_10'] > data['SMA_50'], 'Signal'] = 1
data.loc[data['SMA_10'] < data['SMA_50'], 'Signal'] = 0

features = ['Returns', 'SMA_10', 'SMA_50']
X = data[features]
y = data['Signal']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = XGBClassifier()
model.fit(X_train, y_train)

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

data['Predicted_Signal'] = best_model.predict(X)

# Definiowanie zwrotów z inwestycji na podstawie sygnałów
data['Strategy_Returns'] = data['Predicted_Signal'].shift(1) * data['Returns']

# Obliczanie skumulowanych zwrotów
data['Cumulative_Strategy_Returns'] = (1 + data['Strategy_Returns']).cumprod()
data['Cumulative_Market_Returns'] = (1 + data['Returns']).cumprod()

fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['Cumulative_Market_Returns'], mode='lines', name='Market Returns'))
fig.add_trace(go.Scatter(x=data.index, y=data['Cumulative_Strategy_Returns'], mode='lines', name='Strategy Returns'))
fig.update_layout(title='Cumulative Returns: Market vs Strategy', xaxis_title='Date', yaxis_title='Cumulative Returns')
fig.show()


[*********************100%%**********************]  1 of 1 completed

The least populated class in y has only 3 members, which is less than n_splits=5.



In [24]:
# Install backtesting library
!pip install backtesting

# Import necessary libraries
from backtesting import Backtest, Strategy
from backtesting.lib import SignalStrategy, crossover
import yfinance as yf
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np

train_start_date = '2019-01-01'
train_end_date = '2023-12-31'
train_data = yf.download('AMZN', start=train_start_date, end=train_end_date)

test_start_date = '2024-01-01'
test_end_date = '2024-06-05'
test_data = yf.download('AMZN', start=test_start_date, end=test_end_date)

# Prepare the data as a pandas DataFrame
train_data['Returns'] = np.log(train_data['Close'] / train_data['Close'].shift(1))
train_data['SMA_10'] = train_data['Close'].rolling(window=10).mean()
train_data['SMA_50'] = train_data['Close'].rolling(window=50).mean()
train_data.dropna(inplace=True)

test_data['Returns'] = np.log(test_data['Close'] / test_data['Close'].shift(1))
test_data['SMA_10'] = test_data['Close'].rolling(window=10).mean()
test_data['SMA_50'] = test_data['Close'].rolling(window=50).mean()
test_data.dropna(inplace=True)

# Feature Engineering and Model Training
train_data['Signal'] = 0
train_data.loc[train_data['SMA_10'] > train_data['SMA_50'], 'Signal'] = 1
train_data.loc[train_data['SMA_10'] < train_data['SMA_50'], 'Signal'] = 0

test_data['Signal'] = 0
test_data.loc[test_data['SMA_10'] > test_data['SMA_50'], 'Signal'] = 1
test_data.loc[test_data['SMA_10'] < test_data['SMA_50'], 'Signal'] = 0

features = ['Returns', 'SMA_10', 'SMA_50']
X_train = train_data[features]
y_train = train_data['Signal']

X_test = test_data[features]
y_test = test_data['Signal']

model = XGBClassifier()
model.fit(X_train, y_train)

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

test_data['Predicted_Signal'] = best_model.predict(X_test)

class MyStrategy(SignalStrategy):
    def init(self):
        super().init()
        self.set_signal(test_data['Predicted_Signal'])

# Run backtest
bt = Backtest(data, MyStrategy, cash=10000, commission=0.002, exclusive_orders=True)
stats = bt.run()
print(stats)
bt.plot()




[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Start                     2024-03-13 00:00:00
End                       2024-06-04 00:00:00
Duration                     83 days 00:00:00
Exposure Time [%]                   96.551724
Equity Final [$]                  9981.969757
Equity Peak [$]                  10005.552924
Return [%]                          -0.180302
Buy & Hold Return [%]                1.574535
Return (Ann.) [%]                   -0.781024
Volatility (Ann.) [%]                 0.37075
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                   -0.254031
Avg. Drawdown [%]                   -0.055716
Max. Drawdown Duration       54 days 00:00:00
Avg. Drawdown Duration       14 days 00:00:00
# Trades                                   56
Win Rate [%]                        39.285714
Best Trade [%]                       4.576332
Worst Trade [%]                     -5.890126
Avg. Trade [%]                    


DatetimeFormatter scales now only accept a single format. Using the first provided: '%d %b'


DatetimeFormatter scales now only accept a single format. Using the first provided: '%m/%Y'


found multiple competing values for 'toolbar.active_drag' property; using the latest value


found multiple competing values for 'toolbar.active_scroll' property; using the latest value



In [28]:
# Install necessary libraries
!pip install backtesting yfinance scikit-learn

# Import necessary libraries
from backtesting import Backtest, Strategy
from backtesting.lib import SignalStrategy, crossover
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler

# Fetch data
start_date = '2019-01-01'
end_date = '2024-06-05'
data = yf.download('AMZN', start=start_date, end=end_date)

# Prepare the data as a pandas DataFrame
data['Log_Returns'] = np.log(data['Close'] / data['Close'].shift(1))
data['SMA_10'] = data['Close'].rolling(window=10).mean()
data['SMA_50'] = data['Close'].rolling(window=50).mean()
data.dropna(inplace=True)

# Feature Engineering and Model Training
data['Signal'] = 0
data.loc[data['SMA_10'] > data['SMA_50'], 'Signal'] = 1
data.loc[data['SMA_10'] < data['SMA_50'], 'Signal'] = 0

features = ['Log_Returns', 'SMA_10', 'SMA_50']
X = data[features]
y = data['Signal']

# Splitting the data into training and testing sets
train_data = data[data.index < '2024-01-01']
test_data = data[data.index >= '2024-01-01']

X_train = train_data[features]
y_train = train_data['Signal']
X_test = test_data[features]
y_test = test_data['Signal']

# Initialize and train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Grid Search for hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

# Make predictions
test_data['Predicted_Signal'] = best_model.predict(X_test)

class MyStrategy(SignalStrategy):
    def init(self):
        super().init()
        self.set_signal(test_data['Predicted_Signal'].values)

# Run backtest
bt = Backtest(test_data, MyStrategy, cash=10000, commission=0.002, exclusive_orders=True)
stats = bt.run()
print(stats)
bt.plot()




[*********************100%%**********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


DatetimeFormatter scales now only accept a single format. Using the first provided: '%d %b'


DatetimeFormatter scales now only accept a single format. Using the first provided: '%m/%Y'



Start                     2024-01-02 00:00:00
End                       2024-06-04 00:00:00
Duration                    154 days 00:00:00
Exposure Time [%]                   98.130841
Equity Final [$]                   9996.84786
Equity Peak [$]                  10020.667821
Return [%]                          -0.031521
Buy & Hold Return [%]               19.615824
Return (Ann.) [%]                   -0.074221
Volatility (Ann.) [%]                0.416836
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                   -0.256011
Avg. Drawdown [%]                   -0.082641
Max. Drawdown Duration      116 days 00:00:00
Avg. Drawdown Duration       30 days 00:00:00
# Trades                                  105
Win Rate [%]                        47.619048
Best Trade [%]                        8.32893
Worst Trade [%]                     -5.890126
Avg. Trade [%]                    


found multiple competing values for 'toolbar.active_drag' property; using the latest value


found multiple competing values for 'toolbar.active_scroll' property; using the latest value

