# Machine Learing Algorithmic Trading

In [83]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from finta import TA
import yfinance as yf
from imblearn.over_sampling import RandomOverSampler

In [84]:
# Import the data from yfinance and create the Pandas DataFrame
brkb = yf.Ticker("BRK-B")
hist = brkb.history(period="max")
brkb_df= hist.drop(columns=['Dividends', 'Stock Splits'])
display(brkb_df.head())
display(brkb_df.tail())

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1996-05-09 00:00:00-04:00,22.200001,24.4,22.200001,23.200001,4290000
1996-05-10 00:00:00-04:00,24.0,24.200001,23.6,24.0,1060000
1996-05-13 00:00:00-04:00,24.0,24.1,23.299999,23.9,700000
1996-05-14 00:00:00-04:00,24.0,24.1,23.1,23.6,310000
1996-05-15 00:00:00-04:00,23.6,23.700001,23.0,23.200001,545000


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-12 00:00:00-05:00,321.149994,321.320007,317.720001,318.929993,3070300
2023-01-13 00:00:00-05:00,317.48999,318.420013,315.790009,317.640015,2773000
2023-01-17 00:00:00-05:00,318.399994,318.519989,314.25,314.859985,3478900
2023-01-18 00:00:00-05:00,315.0,315.540009,307.75,308.299988,3406000
2023-01-19 00:00:00-05:00,306.119995,307.234985,303.859985,305.230011,3508099


In [85]:
# CREATE TECHNICAL INDICATORS and GENERATE INPUT FUNCTIONS

# Use the percent change function to generate the returns from "Close"
brkb_df["actual_returns"] = brkb_df["Close"].pct_change()

# Drop all NaN values from the DataFrame
brkb_df = brkb_df.dropna()

# Generate the Input Features, X
# Create a short window simple moving average (SMA). 
# Assign to column called `sma_fast`
short_window = 3
brkb_df['sma_fast'] = TA.SMA(brkb_df, 3)

# Create a long window simple moving average (SMA). 
# Assign to column called `sma_slow`
long_window = 100
brkb_df['sma_slow'] = brkb_df['Close'].rolling(window=long_window).mean()

# Create/Add additional technical indicators
brkb_df["ssma"] = TA.SSMA(brkb_df)
brkb_df["ema"] = TA.EMA(brkb_df, 50)
brkb_df["dema"] = TA.DEMA(brkb_df)
brkb_df["tema"] = TA.TEMA(brkb_df)
brkb_df["trima"] = TA.TRIMA(brkb_df)

# Drop the NaNs using dropna()
brkb_df = brkb_df.dropna()

# Assign a copy of the technical variable columns to a new DataFrame called `X` and lag it.
X = brkb_df[['sma_fast', 'sma_slow', 'ssma', 'ema', 'dema', 'tema', 'trima']].shift().dropna().copy()

# Initialize the new `Signal` column
brkb_df['signal'] = 0.0

# Generate signal to buy stock long
brkb_df.loc[(brkb_df['actual_returns'] >= 0), 'signal'] = 1

# Generate signal to sell stock short
brkb_df.loc[(brkb_df['actual_returns'] < 0), 'signal'] = -1
brkb_df.tail()

# Copy the new "signal" column to a new Series called `y`.
y = brkb_df['signal']
display(X.tail())
display(y.tail())

Unnamed: 0_level_0,sma_fast,sma_slow,ssma,ema,dema,tema,trima
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-01-12 00:00:00-05:00,317.416667,293.1383,311.838185,305.37355,318.269419,319.877099,308.045247
2023-01-13 00:00:00-05:00,318.549998,293.3548,312.626164,305.905175,319.117867,320.366437,307.928302
2023-01-17 00:00:00-05:00,318.980001,293.6443,313.183258,306.365365,319.188967,319.878032,307.90037
2023-01-18 00:00:00-05:00,317.143331,293.8964,313.369561,306.698487,318.171703,318.060611,307.966759
2023-01-19 00:00:00-05:00,313.599996,294.0442,312.806275,306.761291,315.026585,313.592393,308.096357


Date
2023-01-12 00:00:00-05:00   -1.0
2023-01-13 00:00:00-05:00   -1.0
2023-01-17 00:00:00-05:00   -1.0
2023-01-18 00:00:00-05:00   -1.0
2023-01-19 00:00:00-05:00   -1.0
Name: signal, dtype: float64

### Split Data into Training and Test Datasets

In [86]:
# Select the start of the training period
training_begin = X.index.min()
print(training_begin)

# Select the ending period for the training dat
training_end = X.index.min() + DateOffset(months=3)
print(training_end)

# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Scale the data.
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Use RandomOverSampler to resample the datase using random_state=1
ros = RandomOverSampler(random_state=1)
X_resampled, y_resampled = ros.fit_resample(X_train_scaled, y_train)

1996-10-02 00:00:00-04:00
1997-01-02 00:00:00-05:00


### Train and Generate Trade Predictions

In [87]:
# Create the classifier model.
from sklearn.svm import SVC
model = SVC()
 
# Fit the model to the data
model = model.fit(X_resampled, y_resampled)

# Use the trained model to predict the trading signals for the training data.
training_signal_predictions = model.predict(X_resampled)

# Evaluate the model
from sklearn.metrics import classification_report
training_report = classification_report(y_resampled, training_signal_predictions)
print(training_report)

              precision    recall  f1-score   support

        -1.0       0.59      0.58      0.58        33
         1.0       0.59      0.61      0.60        33

    accuracy                           0.59        66
   macro avg       0.59      0.59      0.59        66
weighted avg       0.59      0.59      0.59        66



## Backtest the Trading Algorithm Against Machine Learning Model

In [88]:
# Use the trained model to predict trading signals for the testing data.
testing_signal_predictions = model.predict(X_test_scaled)

In [89]:
# Evaluate the model's ability to predict the trading signal for the testing data
training_report = classification_report(y_test, testing_signal_predictions)
print(training_report)

              precision    recall  f1-score   support

        -1.0       0.21      0.00      0.00      3235
         1.0       0.51      1.00      0.67      3321

    accuracy                           0.51      6556
   macro avg       0.36      0.50      0.34      6556
weighted avg       0.36      0.51      0.34      6556



In [90]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df['predicted_returns'] = testing_signal_predictions
predictions_df['predicted_returns'].value_counts()

 1.0    6542
-1.0      14
Name: predicted_returns, dtype: int64

In [91]:
# Add in actual returns and calculate trading returns
predictions_df['actual_returns'] = brkb_df['actual_returns']
predictions_df['trading_algorithm_returns'] = predictions_df['actual_returns'] * predictions_df['predicted_returns']
display(predictions_df.tail())

Unnamed: 0_level_0,predicted_returns,actual_returns,trading_algorithm_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-12 00:00:00-05:00,1.0,-0.004495,-0.004495
2023-01-13 00:00:00-05:00,1.0,-0.004045,-0.004045
2023-01-17 00:00:00-05:00,1.0,-0.008752,-0.008752
2023-01-18 00:00:00-05:00,1.0,-0.020835,-0.020835
2023-01-19 00:00:00-05:00,1.0,-0.009958,-0.009958


In [92]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[['actual_returns', 'trading_algorithm_returns']]).cumprod().hvplot()