# Step 1: Import libraries and directories

In [20]:
# Import required libraries
import pandas as pd
import numpy as np
import hvplot.pandas
from pathlib import Path
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

# Step 2: Import the OHLC dataset into Panadas Dataframe

In [21]:
# Import the dataset into a Pandas Dataframe
tick_df = pd.read_csv(
    Path("../Resources/META.csv"), 
    index_col='Date', 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
tick_df.head()

  tick_df = pd.read_csv(


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-12-19,141.210007,144.910004,132.5,133.240005,133.240005,57404900
2018-12-20,130.699997,135.570007,130.0,133.399994,133.399994,40297900
2018-12-21,133.389999,134.899994,123.419998,124.949997,124.949997,56901500
2018-12-24,123.099998,129.740005,123.019997,124.059998,124.059998,22066000
2018-12-26,126.0,134.240005,125.889999,134.179993,134.179993,39723400


In [22]:
# Filter the date index and close columns
model_df = tick_df.loc[:, ["High", "Low", "Close"]]

# Drop all NaN values from the DataFrame
model_df = model_df.dropna()

# Review the DataFrame
display(model_df.head())
display(model_df.tail())

Unnamed: 0_level_0,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-12-19,144.910004,132.5,133.240005
2018-12-20,135.570007,130.0,133.399994
2018-12-21,134.899994,123.419998,124.949997
2018-12-24,129.740005,123.019997,124.059998
2018-12-26,134.240005,125.889999,134.179993


Unnamed: 0_level_0,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-12-12,334.470001,324.559998,334.220001
2023-12-13,338.369995,332.640015,334.73999
2023-12-14,334.700012,328.640015,333.170013
2023-12-15,338.660004,331.220001,334.920013
2023-12-18,347.559998,337.019989,344.619995


# Step 3: Generate the Stochastich Oscillator Technical Indicators

In [23]:
# def generate_stochastic_signals(df, high_col='High', low_col='Low', close_col='Close', k_period=14, d_period=3, overbought=80, oversold=20):
    # tick_df = generate_stochastic_signals(tick_df)
k_period=14
d_period=3
overbought=80
oversold=20

# Calculate %K and %D
model_df['%K'] = 100 * ((model_df['Close'] - model_df['Low'].rolling(window=k_period).min()) / (model_df['High'].rolling(window=k_period).max() - model_df['Low'].rolling(window=k_period).min()))
model_df['%D'] = model_df['%K'].rolling(window=d_period).mean()

# Generate entry/exit signals
model_df['Long Signal'] = (model_df['%K'] < oversold) & (model_df['%D'] < oversold)
model_df['Short Signal'] = (model_df['%K'] > overbought) & (model_df['%D'] > overbought)

In [24]:
model_df["Signals"] = np.where(model_df['Short Signal'], -1,       
np.where(model_df['Long Signal'], 1,0))

In [25]:
# Review the DataFrame
display(model_df.head())
display(model_df.tail())

Unnamed: 0_level_0,High,Low,Close,%K,%D,Long Signal,Short Signal,Signals
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-12-19,144.910004,132.5,133.240005,,,False,False,0
2018-12-20,135.570007,130.0,133.399994,,,False,False,0
2018-12-21,134.899994,123.419998,124.949997,,,False,False,0
2018-12-24,129.740005,123.019997,124.059998,,,False,False,0
2018-12-26,134.240005,125.889999,134.179993,,,False,False,0


Unnamed: 0_level_0,High,Low,Close,%K,%D,Long Signal,Short Signal,Signals
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-12-12,334.470001,324.559998,334.220001,70.266544,58.40735,False,False,0
2023-12-13,338.369995,332.640015,334.73999,74.751774,61.577069,False,False,0
2023-12-14,334.700012,328.640015,333.170013,74.352197,73.123505,False,False,0
2023-12-15,338.660004,331.220001,334.920013,81.021407,76.708459,False,False,0
2023-12-18,347.559998,337.019989,344.619995,91.327423,82.233676,False,True,-1


In [26]:
model_df['Signals'].value_counts()

Signals
 0    863
-1    265
 1    130
Name: count, dtype: int64

In [27]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = model_df[['%K', '%D']].shift().dropna()

# Review the DataFrame
X.head()

Unnamed: 0_level_0,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-14,93.01702,95.86809
2019-01-15,94.989352,95.233366
2019-01-16,93.021394,93.675922
2019-01-17,85.804698,91.271815
2019-01-18,89.240549,89.355547


In [28]:
# Create the target set selecting the Signal column and assiging it to y
y = model_df['Signals']

# Review the value counts
y.value_counts()

Signals
 0    863
-1    265
 1    130
Name: count, dtype: int64

In [29]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-01-14 00:00:00


In [30]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)

# Display the training end date
print(training_end)

2019-04-14 00:00:00


In [31]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-14,93.01702,95.86809
2019-01-15,94.989352,95.233366
2019-01-16,93.021394,93.675922
2019-01-17,85.804698,91.271815
2019-01-18,89.240549,89.355547


In [32]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

Unnamed: 0_level_0,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-15,96.748473,93.001863
2019-04-16,95.049469,94.00826
2019-04-17,90.506668,94.101537
2019-04-18,87.696138,91.084092
2019-04-22,81.723589,86.642132


In [33]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [34]:
# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC()
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_train_scaled, y_train)
 
# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test_scaled)

# Review the model's predicted values
svm_pred[:10]

array([-1, -1, -1, -1, -1, -1, -1, -1, -1,  0])

In [35]:
# Use a classification report to evaluate the model using the predictions and testing data
svm_testing_report = classification_report(y_test, svm_pred)

# Print the classification report
print(svm_testing_report)

              precision    recall  f1-score   support

          -1       0.64      0.87      0.74       244
           0       0.81      0.85      0.83       806
           1       0.00      0.00      0.00       129

    accuracy                           0.76      1179
   macro avg       0.48      0.57      0.52      1179
weighted avg       0.69      0.76      0.72      1179



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
