In [1]:
# Import required libraries
import pandas as pd
from pathlib import Path

In [2]:
# Import the OHLCV dataset into a Pandas Dataframe
trading_df = pd.read_csv(
    Path("stock_data.csv"), 
    index_col="Date", 
    infer_datetime_format=True, 
    parse_dates=True
)

# Display sample data
trading_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-12-31,39.6325,39.84,39.119999,39.435001,38.233902,140014000
2019-01-02,38.7225,39.712502,38.557499,39.48,38.277519,148158800
2019-01-03,35.994999,36.43,35.5,35.547501,34.464806,365248800
2019-01-04,36.1325,37.137501,35.950001,37.064999,35.936073,234428400
2019-01-07,37.174999,37.2075,36.474998,36.982498,35.856091,219111200


In [3]:
# Calculate the daily returns using the closing prices and the pct_change function
trading_df["actual_returns"] = trading_df["Close"].pct_change()

# Drop all NaN values from the DataFrame
trading_df = trading_df.dropna()

# Review the DataFrame
display(trading_df.head())
display(trading_df.tail())

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-02,38.7225,39.712502,38.557499,39.48,38.277519,148158800,0.001141
2019-01-03,35.994999,36.43,35.5,35.547501,34.464806,365248800,-0.099607
2019-01-04,36.1325,37.137501,35.950001,37.064999,35.936073,234428400,0.042689
2019-01-07,37.174999,37.2075,36.474998,36.982498,35.856091,219111200,-0.002226
2019-01-08,37.389999,37.955002,37.130001,37.6875,36.539616,164101200,0.019063


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-03-31,177.839996,178.029999,174.399994,174.610001,174.610001,103049300,-0.017776
2022-04-01,174.029999,174.880005,171.940002,174.309998,174.309998,78699800,-0.001718
2022-04-04,174.570007,178.490005,174.440002,178.440002,178.440002,76468400,0.023693
2022-04-05,177.5,178.300003,174.419998,175.059998,175.059998,73401800,-0.018942
2022-04-06,172.360001,173.630005,170.130005,171.830002,171.830002,88950300,-0.018451


In [4]:
# Drop all NaN values from the DataFrame
trading_df = trading_df.dropna()

# Review the DataFrame
display(trading_df.head())
display(trading_df.tail())

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-02,38.7225,39.712502,38.557499,39.48,38.277519,148158800,0.001141
2019-01-03,35.994999,36.43,35.5,35.547501,34.464806,365248800,-0.099607
2019-01-04,36.1325,37.137501,35.950001,37.064999,35.936073,234428400,0.042689
2019-01-07,37.174999,37.2075,36.474998,36.982498,35.856091,219111200,-0.002226
2019-01-08,37.389999,37.955002,37.130001,37.6875,36.539616,164101200,0.019063


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-03-31,177.839996,178.029999,174.399994,174.610001,174.610001,103049300,-0.017776
2022-04-01,174.029999,174.880005,171.940002,174.309998,174.309998,78699800,-0.001718
2022-04-04,174.570007,178.490005,174.440002,178.440002,178.440002,76468400,0.023693
2022-04-05,177.5,178.300003,174.419998,175.059998,175.059998,73401800,-0.018942
2022-04-06,172.360001,173.630005,170.130005,171.830002,171.830002,88950300,-0.018451


In [5]:
# Define a window size of 4
short_window = 4

# Create an SMA that uses short_window, and assign it to a new column named “sma_fast”
trading_df["sma_fast"] = trading_df["Close"].rolling(window=short_window).mean()

In [6]:
# Define a window size of 100
long_window = 100

# Create an SMA that uses long_window, and assign it to a new columns named “sma_slow”
trading_df["sma_slow"] = trading_df["Close"].rolling(window=long_window).mean()

In [7]:
# Drop the NaNs using dropna()
trading_df = trading_df.dropna()

In [8]:
# Assign a copy of the `sma_fast` and `sma_slow` columns to a new DataFrame called `X`
X = trading_df[["sma_fast", "sma_slow"]].shift().dropna().copy()

# Display sample data
display(X.head())
display(X.tail())

Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-05-28,45.500626,45.148425
2019-05-29,44.9775,45.1992
2019-05-30,44.64,45.287175
2019-05-31,44.555,45.362275
2019-06-03,44.311251,45.430125


Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-31,176.762505,167.076301
2022-04-01,176.735004,167.309601
2022-04-04,176.412502,167.5483
2022-04-05,176.282501,167.824601
2022-04-06,175.605,168.096001


In [9]:
# Create a new column in the `trading_df` called "signal" setting its value to zero.
trading_df["signal"] = 0.0

In [10]:
# Create the signal to buy
trading_df.loc[(trading_df["actual_returns"] >= 0), "signal"] = 1

In [11]:
# Create the signal to sell
trading_df.loc[(trading_df["actual_returns"] < 0), "signal"] = -1

In [12]:
# Copy the new "signal" column to a new Series called `y`.
y = trading_df["signal"].copy()

In [13]:
# Import required libraries
from pandas.tseries.offsets import DateOffset

In [14]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-05-28 00:00:00


In [15]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)

# Display the training end date
print(training_end)

2019-08-28 00:00:00


In [16]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

In [17]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

In [18]:
# Import required libraries
from sklearn.preprocessing import StandardScaler

In [19]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [20]:
# Import the SVM model
from sklearn import svm
from sklearn.metrics import classification_report

In [21]:
# Create the classifier model
svm_model = svm.SVC()
 
# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(X_train_scaled, y_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(X_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [22]:
# Evaluate the model using a classification report
training_report = classification_report(y_train, training_signal_predictions)

# Display the report
print(training_report)

              precision    recall  f1-score   support

        -1.0       0.58      0.47      0.52        32
         1.0       0.57      0.68      0.62        34

    accuracy                           0.58        66
   macro avg       0.58      0.57      0.57        66
weighted avg       0.58      0.58      0.57        66



In [23]:
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_test_scaled)

In [24]:
# Evaluate the model's ability to predict the trading signal for the testing data
testing_report = classification_report(y_test, testing_signal_predictions)

# Display the report
print(testing_report)

              precision    recall  f1-score   support

        -1.0       0.46      0.99      0.63       302
         1.0       0.57      0.01      0.02       356

    accuracy                           0.46       658
   macro avg       0.52      0.50      0.32       658
weighted avg       0.52      0.46      0.30       658

