# Installing and Importing libraries

In [118]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# Function to download stock data

In [119]:
def stock_download_data(ticker, start_date, end_date):
    data = yf.download(ticker, start = start_date, end = end_date)
    return data

# Function to create Trading strategy given in the document

In [120]:
def create_strategy_formula(data):
    formula = np.where(data['Close'].shift(-1) > data['Close'], 1, -1)
    return formula[:]

# Fuction to preprocess the data and split into Training and Test sets

In [121]:
def Train_test_data_fun(features, labels):
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.2, random_state = 42)
    return X_train, X_test, y_train, y_test

# Function to train and evauate classifiers

In [122]:
def train_and_evaluate(classifier,X_train, X_test, y_train, y_test):
    classifier.fit(X_train, y_train)
    predictions = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy,predictions

# Defining the stock data download parameters

In [123]:
ticker = "AAPL"
start_date = "2015-01-01"
end_date = "2023-12-31"

# Downloading Stock Data

In [124]:
data = stock_download_data(ticker, start_date, end_date)

[*********************100%%**********************]  1 of 1 completed


In [125]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,27.8475,27.860001,26.8375,27.3325,24.435268,212818400
2015-01-05,27.0725,27.1625,26.352501,26.5625,23.746897,257142000
2015-01-06,26.635,26.8575,26.157499,26.565001,23.749123,263188400
2015-01-07,26.799999,27.049999,26.674999,26.9375,24.082142,160423600
2015-01-08,27.307501,28.0375,27.174999,27.9725,25.007429,237458000


# Applying the strategy formula

In [126]:
strategy_formula = create_strategy_formula(data)

In [127]:
strategy_formula.shape

(2264,)

# Assiging the features

In [128]:
features = data[['Open', 'High', 'Low', 'Close', 'Volume']].values

In [129]:
features_df = pd.DataFrame(features)
features_df.head()

Unnamed: 0,0,1,2,3,4
0,27.8475,27.860001,26.8375,27.3325,212818400.0
1,27.0725,27.1625,26.352501,26.5625,257142000.0
2,26.635,26.8575,26.157499,26.565001,263188400.0
3,26.799999,27.049999,26.674999,26.9375,160423600.0
4,27.307501,28.0375,27.174999,27.9725,237458000.0


In [130]:
features_df.shape

(2264, 5)

# Labels for ML Classifiers

In [131]:
labels = strategy_formula

In [132]:
labels

array([-1,  1,  1, ...,  1, -1, -1])

In [133]:
labels.shape

(2264,)

# Checking for the null values

In [134]:
null_data = np.count_nonzero(np.isnan(labels))
null_data

0

- There are no null values in the labels

# Train_Split_data

In [135]:
X_train, X_test, y_train, y_test = Train_test_data_fun(features_df,labels)

In [136]:
X_train.shape

(1811, 5)

In [137]:
y_train.shape

(1811,)

In [138]:
X_test.shape

(453, 5)

# Initializing the classifiers

In [139]:
rf_classifier = RandomForestClassifier()
xgb_classifier = XGBClassifier()

In [140]:
# Preprocess the labels to be in the range of 0 and 1
y_train_processed = np.where(y_train == -1, 0, y_train)
y_test_processed = np.where(y_test == -1, 0, y_test)

In [141]:
rf_accuracy, rf_predictions = train_and_evaluate(rf_classifier, X_train, X_test, y_train_processed, y_test_processed)

In [142]:
# Train and evaluate the XGBClassifier model
xgb_accuracy, xgb_predictions = train_and_evaluate(xgb_classifier, X_train, X_test, y_train_processed, y_test_processed)

# Accuracy of the Model

In [143]:
print(f'Random Forest Classifier (RF) Accuracy: {rf_accuracy}')
print(f'XGBoost Classifier (XGB) Accuracy: {xgb_accuracy}')

Random Forest Classifier (RF) Accuracy: 0.5099337748344371
XGBoost Classifier (XGB) Accuracy: 0.4900662251655629
