# Initialisation

Load, clean, and edit data as required

In [1]:
pip install finta

Note: you may need to restart the kernel to use updated packages.


In [31]:
# Import required libraries
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from finta import TA

In [32]:
# Import required trading data
trading_df = pd.read_csv(
    Path("Data/googl_data.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Display sample data
trading_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-04-07,845.0,845.880005,837.299988,842.099976,842.099976,1111600
2017-04-10,841.539978,846.73999,840.789978,841.700012,841.700012,1046200
2017-04-11,841.700012,844.630005,834.599976,839.880005,839.880005,974300
2017-04-12,838.460022,843.719971,837.590027,841.460022,841.460022,1135800
2017-04-13,841.039978,843.72998,837.849976,840.179993,840.179993,1073700


In [33]:
# Check for nulls

trading_df.isna().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [34]:
# Calculate the daily returns using the closing prices and the pct_change function
trading_df["actual_returns"] = trading_df["Close"].pct_change()

# Drop all NaN values from the DataFrame
trading_df = trading_df.dropna()

# Review the DataFrame
display(trading_df.head())
display(trading_df.tail())

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-04-10,841.539978,846.73999,840.789978,841.700012,841.700012,1046200,-0.000475
2017-04-11,841.700012,844.630005,834.599976,839.880005,839.880005,974300,-0.002162
2017-04-12,838.460022,843.719971,837.590027,841.460022,841.460022,1135800,0.001881
2017-04-13,841.039978,843.72998,837.849976,840.179993,840.179993,1073700,-0.001521
2017-04-17,841.380005,855.640015,841.030029,855.130005,855.130005,1049100,0.017794


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-03-31,2841.060059,2842.159912,2780.48999,2781.350098,2781.350098,1876200,-0.020227
2022-04-01,2790.0,2809.419922,2766.149902,2803.01001,2803.01001,1296700,0.007788
2022-04-04,2807.169922,2874.23999,2806.209961,2859.429932,2859.429932,1297100,0.020128
2022-04-05,2857.379883,2859.810059,2807.649902,2811.820068,2811.820068,1070900,-0.01665
2022-04-06,2775.0,2787.209961,2710.340088,2730.959961,2730.959961,1621000,-0.028757


# Define X values (features)

For this example we will use short and long SMA windows of the close price in our data

In [35]:
# Define a window size of 4
short_window = 4

# Create an SMA that uses short_window, and assign it to a new column named “sma_fast”
trading_df["sma_fast"] = trading_df["Close"].rolling(window=short_window).mean()

In [36]:
# Define a window size of 100
long_window = 100

# Create an SMA that uses long_window, and assign it to a new columns named “sma_slow”
trading_df["sma_slow"] = trading_df["Close"].rolling(window=long_window).mean()

In [37]:
# Drop the NaNs using dropna()
trading_df = trading_df.dropna()

In [38]:
# Assign a copy of the `sma_fast` and `sma_slow` columns to a new DataFrame called `X`
# Shift X values 1 day forward to predict next day prices
X = trading_df[["sma_fast", "sma_slow"]].shift().dropna().copy()

# Display sample data
display(X.head())
display(X.tail())

Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-31,934.502502,945.779905
2017-09-01,940.6875,946.915305
2017-09-05,946.652496,948.036404
2017-09-06,948.084991,949.036604
2017-09-07,947.682495,950.055004


Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-31,2837.862549,2794.726108
2022-04-01,2824.835083,2792.769209
2022-04-04,2818.310059,2790.993108
2022-04-05,2820.640015,2789.803706
2022-04-06,2813.902527,2788.743206


In [48]:
X["Close"] = trading_df["Close"]
X["Returns"] = trading_df["actual_returns"]
display(X.tail())

Unnamed: 0_level_0,sma_fast,sma_slow,Close,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-31,2837.862549,2794.726108,2781.350098,-0.020227
2022-04-01,2824.835083,2792.769209,2803.01001,0.007788
2022-04-04,2818.310059,2790.993108,2859.429932,0.020128
2022-04-05,2820.640015,2789.803706,2811.820068,-0.01665
2022-04-06,2813.902527,2788.743206,2730.959961,-0.028757


In [46]:
bb_signals_df = trading_df.copy()
bbands_df = TA.BBANDS(bb_signals_df)
#bbands_df.tail()
bollinger_df = pd.DataFrame(bbands_df)
bollinger_df.tail()

Unnamed: 0_level_0,BB_UPPER,BB_MIDDLE,BB_LOWER
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-03-31,2924.601996,2701.960522,2479.319049
2022-04-01,2935.093366,2710.204529,2485.315692
2022-04-04,2943.780434,2726.797522,2509.81461
2022-04-05,2941.915163,2740.284021,2538.652879
2022-04-06,2942.269673,2743.412024,2544.554375


# Define Y (Target)

In this example we use a trading signal that throws a long when actual returns are positive (when the price is going up) and a short when actual returns are negative

In [14]:
# Create a new column in the `trading_df` called "signal" setting its value to zero.
trading_df["signal"] = 0.0

In [15]:
# Create the signal to buy
trading_df.loc[(trading_df["actual_returns"] >= 0), "signal"] = 1

In [16]:
# Create the signal to sell
trading_df.loc[(trading_df["actual_returns"] < 0), "signal"] = -1

In [17]:
# Copy the new "signal" column to a new Series called `y`.
y = trading_df["signal"].copy()

# Split training and testing data

We need to do this manually since our data is sequential and we can't have splitting functions randomise data

The offset will determine how big the training/testing datasets will be. We will use 12 months for now

In [18]:
# Import required libraries
from pandas.tseries.offsets import DateOffset

In [19]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2017-08-31 00:00:00


In [20]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=12)

# Display the training end date
print(training_end)

2018-08-31 00:00:00


In [21]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

In [22]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Scale Data

In [23]:
# Import required libraries
from sklearn.preprocessing import StandardScaler

In [24]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create the model

Here we are using an svc model which is a classifier from sklearn

In [25]:
# Import the SVM model
from sklearn import svm
from sklearn.metrics import classification_report

In [26]:
# Create the classifier model
svm_model = svm.SVC()
 
# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(X_train_scaled, y_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(X_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

array([ 1., -1., -1.,  1.,  1., -1.,  1.,  1.,  1., -1.])

# Evaluate model and produce metrics

We evaluate the model's training data to see how well its training went, then predict using the model and testing data, and run some metrics

In [27]:
# Evaluate the model using a classification report
training_report = classification_report(y_train, training_signal_predictions)

# Display the report
print(training_report)

              precision    recall  f1-score   support

        -1.0       0.95      0.97      0.96       117
         1.0       0.97      0.96      0.96       136

    accuracy                           0.96       253
   macro avg       0.96      0.96      0.96       253
weighted avg       0.96      0.96      0.96       253



In [28]:
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_test_scaled)

In [29]:
# Evaluate the model's ability to predict the trading signal for the testing data
testing_report = classification_report(y_test, testing_signal_predictions)

# Display the report
print(testing_report)

              precision    recall  f1-score   support

        -1.0       0.54      1.00      0.70       413
         1.0       1.00      0.29      0.45       493

    accuracy                           0.61       906
   macro avg       0.77      0.65      0.58       906
weighted avg       0.79      0.61      0.56       906



In [30]:
# Plots
signals_df = pd.DataFrame({"training": training_signal_predictions, "testing":testing_signal_predictions})
signals_df.head()

ValueError: All arrays must be of the same length