In [3]:
# standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# neural network packages
import torch
import torch.utils
import torch.nn.functional as F
import torch.optim as optim
from torch import nn
torch.manual_seed(0)

# data processing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit, train_test_split

# to estimate the HAR model
from statsmodels.regression.linear_model import OLS
import statsmodels.api as sm

# own helping code for estimation, data processing etc.
from utils.estimating import model_estimator, EarlyStopper, kfolds_fit_and_evaluate_model, single_fit_and_evaluate_model, fit_and_evaluateHAR
from utils.preprocessing import PreProcessor, data_to_loaders
from utils.functions import get_ticker_daily_close, print_nicely, reset_model_weights
from utils.modelbuilder import ForwardNeuralNetwork

# ignore annoying warnings
import warnings
warnings.simplefilter("ignore")

### Process Data to Features, Targets and Divide in Training/Validation Data
- First the features are computed (previous daily/weekly/monthly volatility)
- Then the data seperated into training and validation data 

In [4]:
# LOAD DATA
# file_name = "msft_stock.csv"
# data = pd.read_csv("data/" + file_name)
# returns = data["price"].apply(np.log).diff()

# or through yahoo finance
prices = get_ticker_daily_close("MSFT")
returns = prices.apply(np.log).diff()
print(f"Total observations: {len(returns)}")

################################################################
# targets are just the squared returns
targets = returns ** 2

# features for HAR (realized daily/weekly/monthly volatility)
features_har = np.zeros(shape=(len(targets), 3))
features_har[:, 0] = targets.shift(1).values
features_har[:, 1] = targets.rolling(5).apply(np.mean).shift(1).values
features_har[:, 2] = targets.rolling(21).apply(np.mean).shift(1).values
    
# # features and targets to numpy array
features_har = features_har
features = features_har.copy() # or df.values # HAR and NN same input features
targets = targets.values.reshape(-1,1)

# add constant for har features and drop nan values
features_har = sm.add_constant(features_har)

# remove the first few observations due to no estimation for monthly volatility available
start_index = 22
features_har = features_har[start_index:]
features = features[start_index:]
targets = targets[start_index:]

# final overview if features and targets
print(f"shape har featurs: {features_har.shape} of type {type(features_har)}")
print(f"shape nn features: {features.shape} of type {type(features)}")
print(f"shape targets: {targets.shape} of type {type(targets)}")

# split the data into the features set (used for cross validation and ultimately estimating the final model) and testing data
train_size = .8 #.6
features_har, features_har_validation, _placeholder, targets_validation = train_test_split(features_har, targets, shuffle=False, train_size = train_size)
features, features_validation, targets, targets_validation = train_test_split(features, targets, shuffle=False, train_size = train_size)

# # for model specification and final estimation
# features, features_har, targets
# # for model comparison
# features_validation, features_har_validation, targets_validation

Total observations: 9286
shape har featurs: (9264, 4) of type <class 'numpy.ndarray'>
shape nn features: (9264, 3) of type <class 'numpy.ndarray'>
shape targets: (9264, 1) of type <class 'numpy.ndarray'>


In [7]:
def get_avg_vol_daily(ticker):
    prices = get_ticker_daily_close(ticker)
    returns = prices.apply(np.log).diff()
    targets = returns ** 2
    avg_targets = np.mean(targets)
    return avg_targets

In [21]:
stocks = ["MU", "GOLD", "MSFT", "KO", "AAPL", "ASML"]
for stock in stocks:
    print("{:5}: {:1.7%}".format(stock , get_avg_vol_daily(stock)))

MU   : 0.1472258%
GOLD : 0.0708661%
MSFT : 0.0458784%
KO   : 0.0213114%
AAPL : 0.0824258%
ASML : 0.0959555%
