#Decision Tree Regression



##Objective: Construct a Decision Tree Regression to predict stock prices based on raw and enguneered features.

##Import Libraries

In [155]:
import numpy as np
import pandas as pd
import pandas_datareader.data as web

import yfinance as yf

import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [156]:
from IPython import display
display.set_matplotlib_formats('svg') # Improve matplotlib visual quality


`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`



##Import Dataset

In [157]:
# Financial Companies: ['BAC', 'WFC', 'JPM', 'GS', 'MS', 'UBS']
ticker = ['UBS']
period = 'max'

stock_data = yf.download(ticker, period = period)[['Adj Close', 'Volume']].copy()
stock_data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-05-16,15.969481,346200
2000-05-17,15.678802,174000
2000-05-18,15.798031,151800
2000-05-19,15.611794,63600
2000-05-22,15.552178,87600
...,...,...
2024-06-28,29.540001,1435600
2024-07-01,29.940001,1077000
2024-07-02,30.020000,1346500
2024-07-03,30.340000,1003100


##Data Preprocessing

###Feature Engineering

Features Engineered:


* **Lagged values:** stock prices/returns often exhibit autocorrelation, past returns can influence future returns.
*   **Moving Averages:** they identify trends or cycles & crossovers of a short Simple Moving Average and long Simple Moving Averages often signal buy/sell periods.
* **Exponential Moving Average:** similar to a moving average but higher weight is placed on recent prices/returns which makes the indicator more responsive to information.
* **Volatility:** the degree of variation in stock prices, which is regarded as the riskiness of an asset.
* **Volume Moving Average:** high volume often indicates significant price movement and strong investor interest.
* **Treasury Rate:** the interest rates on debt securities issued by the U.S. Treasury. The relationship between Treasury rates and stock prices are generally considered inversely related.
* **Federal Funds Rate:** rate ar which depository institutions lend reserve balances to other depository institutions overnight. The rate influences other interest rates throughout the economy, such as, mortgages, credit card, and business loans.


In [158]:


# Lagged value
for lag in [1, 5, 10, 15]:
  stock_data[f'Lagged {lag}'] = stock_data['Adj Close'].shift(lag)

# Moving Averages
for window in [5, 14, 30]:
  stock_data[f'MA_{window}'] = stock_data['Adj Close'].rolling(window).mean()

# Exponential Moving Averages
for span in [12, 26]:
  stock_data[f'EMA_{span}'] = stock_data['Adj Close'].ewm(span = span, adjust=False).mean()

# Volatility Averages
for vol in [5, 14, 30]:
  stock_data[f'Volatility_{vol}'] = stock_data['Adj Close'].rolling(vol).std()

# Volume Averages
for window in [5, 14, 30]:
  stock_data[f'Volume_{window}_MA'] = stock_data['Volume'].rolling(window).mean()

# Fetch interest rate data from FRED
start_date = stock_data.index.min().strftime('%Y-%m-%d')
end_date = stock_data.index.max().strftime('%Y-%m-%d')

interest_rate = web.DataReader('DGS10', 'fred', start_date, end_date)
interest_rate.columns = ['10Y_Treasury_Rate']
stock_data = stock_data.join(interest_rate, how='left')
stock_data['10Y_Treasury_Rate'].fillna(method='ffill', inplace=True)

# Fetch Federal Funds Rate data from FRED
fed_rate = web.DataReader('FEDFUNDS', 'fred', start_date, end_date)
fed_rate.columns = ['Fed_Rate']
stock_data = stock_data.join(fed_rate, how='left')
stock_data['Fed_Rate'].fillna(method='ffill', inplace=True)

In [159]:
stock_data

Unnamed: 0_level_0,Adj Close,Volume,Lagged 1,Lagged 5,Lagged 10,Lagged 15,MA_5,MA_14,MA_30,EMA_12,EMA_26,Volatility_5,Volatility_14,Volatility_30,Volume_5_MA,Volume_14_MA,Volume_30_MA,10Y_Treasury_Rate,Fed_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2000-05-16,15.969481,346200,,,,,,,,15.969481,15.969481,,,,,,,6.43,
2000-05-17,15.678802,174000,15.969481,,,,,,,15.924762,15.947950,,,,,,,6.48,
2000-05-18,15.798031,151800,15.678802,,,,,,,15.905265,15.936845,,,,,,,6.56,
2000-05-19,15.611794,63600,15.798031,,,,,,,15.860115,15.912767,,,,,,,6.51,
2000-05-22,15.552178,87600,15.611794,,,,15.722058,,,15.812740,15.886057,0.165740,,,164640.0,,,6.44,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-28,29.540001,1435600,29.459999,30.240000,30.420000,31.690001,29.722000,30.450000,30.675667,30.180100,30.299728,0.534060,0.708498,0.702207,2230660.0,2.157314e+06,1.848003e+06,4.36,5.33
2024-07-01,29.940001,1077000,29.540001,30.600000,30.400000,31.620001,29.590000,30.355714,30.673000,30.143162,30.273082,0.287403,0.679658,0.704929,2215040.0,2.153400e+06,1.823797e+06,4.48,5.33
2024-07-02,30.020000,1346500,29.940001,29.799999,30.990000,31.260000,29.634000,30.295714,30.662333,30.124214,30.254335,0.339677,0.668704,0.712522,1974080.0,2.118650e+06,1.832547e+06,4.43,5.33
2024-07-03,30.340000,1003100,30.020000,29.209999,31.180000,30.860001,29.860000,30.238571,30.670333,30.157412,30.260681,0.362216,0.623672,0.707319,1283540.0,2.065971e+06,1.823480e+06,4.36,5.33


In [160]:
# stock_data.drop(columns='Adj Close', inplace = True)
stock_data.dropna(inplace = True)

In [161]:
stock_data

Unnamed: 0_level_0,Adj Close,Volume,Lagged 1,Lagged 5,Lagged 10,Lagged 15,MA_5,MA_14,MA_30,EMA_12,EMA_26,Volatility_5,Volatility_14,Volatility_30,Volume_5_MA,Volume_14_MA,Volume_30_MA,10Y_Treasury_Rate,Fed_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2000-06-27,17.273506,46200,17.191597,17.780237,18.063414,17.303316,17.339108,17.661016,16.899192,17.423611,17.149374,0.159263,0.308754,0.867874,71160.0,8.117143e+04,1.154000e+05,6.10,6.53
2000-06-28,17.497065,19800,17.273506,17.608904,18.242258,17.482161,17.316740,17.652499,16.950111,17.434911,17.175129,0.113046,0.311713,0.856180,49440.0,7.581429e+04,1.045200e+05,6.11,6.53
2000-06-29,17.288412,28200,17.497065,17.333122,17.690815,17.616295,17.307798,17.623756,17.003765,17.412373,17.183520,0.113195,0.326128,0.823577,32880.0,7.470000e+04,9.966000e+04,6.04,6.53
2000-06-30,17.526873,82200,17.288412,17.288412,17.944176,17.690815,17.355490,17.606723,17.061393,17.429988,17.208954,0.147900,0.324387,0.796335,37200.0,7.821429e+04,9.734000e+04,6.03,6.53
2000-07-03,17.467257,66000,17.526873,17.191597,17.765333,17.765333,17.410622,17.564140,17.123242,17.435722,17.228087,0.120343,0.297871,0.750608,48480.0,8.027143e+04,9.742000e+04,6.00,6.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-28,29.540001,1435600,29.459999,30.240000,30.420000,31.690001,29.722000,30.450000,30.675667,30.180100,30.299728,0.534060,0.708498,0.702207,2230660.0,2.157314e+06,1.848003e+06,4.36,5.33
2024-07-01,29.940001,1077000,29.540001,30.600000,30.400000,31.620001,29.590000,30.355714,30.673000,30.143162,30.273082,0.287403,0.679658,0.704929,2215040.0,2.153400e+06,1.823797e+06,4.48,5.33
2024-07-02,30.020000,1346500,29.940001,29.799999,30.990000,31.260000,29.634000,30.295714,30.662333,30.124214,30.254335,0.339677,0.668704,0.712522,1974080.0,2.118650e+06,1.832547e+06,4.43,5.33
2024-07-03,30.340000,1003100,30.020000,29.209999,31.180000,30.860001,29.860000,30.238571,30.670333,30.157412,30.260681,0.362216,0.623672,0.707319,1283540.0,2.065971e+06,1.823480e+06,4.36,5.33


###Splitting the Dataset into Train & Test Sets

In [162]:
x = stock_data.drop(columns = 'Adj Close')
y = stock_data['Adj Close']

In [163]:
x.shape, y.shape

((6043, 18), (6043,))

In [164]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1, shuffle=False)

In [165]:
x_train.shape, y_train.shape

((5438, 18), (5438,))

In [166]:
x_test

Unnamed: 0_level_0,Volume,Lagged 1,Lagged 5,Lagged 10,Lagged 15,MA_5,MA_14,MA_30,EMA_12,EMA_26,Volatility_5,Volatility_14,Volatility_30,Volume_5_MA,Volume_14_MA,Volume_30_MA,10Y_Treasury_Rate,Fed_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2022-02-07,3481500,18.852991,17.245731,17.060987,18.123259,18.902873,17.885732,17.591816,18.225345,17.764400,0.094110,0.818745,0.763372,5394140.0,6.535886e+06,4.359580e+06,1.92,0.08
2022-02-08,3933600,19.037735,18.843756,17.190308,17.910805,19.021108,18.009114,17.683263,18.411435,17.888143,0.247549,0.914399,0.814417,4267240.0,6.623200e+06,4.455623e+06,1.96,0.08
2022-02-09,3769700,19.434933,18.963839,17.365814,17.707588,19.163360,18.150310,17.781485,18.605845,18.020510,0.376951,1.010310,0.871014,3919640.0,6.434200e+06,4.545137e+06,1.94,0.08
2022-02-10,4029300,19.675098,18.816044,17.014801,17.698349,19.313002,18.292166,17.878167,18.753292,18.134861,0.352310,1.061918,0.903058,3886900.0,6.162229e+06,4.635270e+06,2.03,0.08
2022-02-11,7004900,19.564251,18.852991,17.024038,17.578268,19.368425,18.439960,17.964072,18.811263,18.208583,0.275214,1.020567,0.895191,4443800.0,5.814307e+06,4.808817e+06,1.92,0.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-06-28,1435600,29.459999,30.240000,30.420000,31.690001,29.722000,30.450000,30.675667,30.180100,30.299728,0.534060,0.708498,0.702207,2230660.0,2.157314e+06,1.848003e+06,4.36,5.33
2024-07-01,1077000,29.540001,30.600000,30.400000,31.620001,29.590000,30.355714,30.673000,30.143162,30.273082,0.287403,0.679658,0.704929,2215040.0,2.153400e+06,1.823797e+06,4.48,5.33
2024-07-02,1346500,29.940001,29.799999,30.990000,31.260000,29.634000,30.295714,30.662333,30.124214,30.254335,0.339677,0.668704,0.712522,1974080.0,2.118650e+06,1.832547e+06,4.43,5.33
2024-07-03,1003100,30.020000,29.209999,31.180000,30.860001,29.860000,30.238571,30.670333,30.157412,30.260681,0.362216,0.623672,0.707319,1283540.0,2.065971e+06,1.823480e+06,4.36,5.33


##Train the Support Vector Regression Model

In [167]:
dt = DecisionTreeRegressor(random_state = 0)
dt.fit(x_train, y_train)

##Predict returns with model

In [168]:
pred = dt.predict(x_test)

In [169]:
pred.shape, y_test.shape

((605,), (605,))

In [170]:
data = {
        'True Value': y_test.values.flatten(),
        'predicted': pred.flatten(),
        }

test_results = pd.DataFrame(data, index = y_test.index)
train_values = pd.DataFrame({'Trained Value': y_train})

combined = pd.concat([train_values, test_results])
combined

Unnamed: 0_level_0,Trained Value,True Value,predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-06-27,17.273506,,
2000-06-28,17.497065,,
2000-06-29,17.288412,,
2000-06-30,17.526873,,
2000-07-03,17.467257,,
...,...,...,...
2024-06-28,,29.540001,29.960222
2024-07-01,,29.940001,30.063953
2024-07-02,,30.020000,30.085417
2024-07-03,,30.340000,30.085417


##Visualize results

In [171]:
px.line(combined, title = f'{ticker[0]} True vs. Predicted Price')

In [172]:
rmse = np.sqrt(mean_squared_error(y_test, pred))
print('Root Mean Squared Error is:', rmse)

Root Mean Squared Error is: 0.6165958398141198
