In [8]:
# The codes in this notebook is partially based on (https://github.com/juliansester/nga/blob/main/Example%20SP500-Asian.ipynb).
import torch
import torch.nn as nn
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from GAD_util import *

import pandas as pd
from scipy.optimize import minimize
import os

In [3]:
def compute_max_parameters(x, 
                           iterations = 1000,
                           initial_guess= [0.1,0.1,0,0,0.75],
                           tolerance = 1e-15,
                           Delta = 1/250.,
                           method = 'COBYLA'):
    x_0 = np.array(initial_guess) #Initial guess
    eps = tolerance # Tolerance to avoid that fractions and log-expressions become inf or -inf
    
    #Definte the Log-Likelihood Function
    def log_likelihood(param):
        a_0 = param[0]
        a_1 = param[1]
        b_0 = param[2]
        b_1 = param[3]
        gamma = param[4]
        constant = np.sqrt(2*np.pi*Delta)
        l= [-np.log((a_0+a_1*np.maximum(x[i],0))**gamma*constant+eps)-(1/(2*Delta))*((x[i+1]-x[i]-(b_0+b_1*x[i])*Delta)/(a_0+a_1*np.maximum(x[i],0)+eps)**gamma)**2 for i in range(len(x)-1)]
        return -np.mean(l) # Mean instead of sum to have smaller values
    
    a0,a1,b0,b1,gamma = minimize(log_likelihood,x_0,method=method,options={'maxiter': iterations,
                                                                          'rhobeg':0.01},
                bounds = [(eps,None),(eps,None),(None,None),(None,None),(eps,None)]).x
    a0 = np.max([a0,eps])
    a1 = np.max([a1,eps])
    gamma = np.min([np.max([gamma,eps]),1.2]) # artificial lower/upper bound
    return a0,a1,b0,b1,gamma



In [4]:
sequence_length = 30
dt = 1/250
learning_rate = 0.005
batch_size = 10000
batch_num= 20
epoch_num = 300
T = dt * sequence_length

In [5]:
# import yfinance as yf
# # Stock tickers you're interested in
# tickers = [
#     'AAPL', 'MSFT', 'AMZN', 'GOOGL', 'BRK-B'
# ]

# # Define date range
# start_date = '2008-09-26'
# end_date = '2021-09-30'

# # Fetching Close prices data
# data = yf.download(tickers, start=start_date, end=end_date)['Close']

# # Save to CSV for future reference
# data.to_csv('stocks_close_prices_2008_2021.csv')


# Load the dataset. Generated from the above code from yfinance.
data = pd.read_csv("../Data/stocks_close_prices_2008_2021.csv")



In [9]:
for company_name in ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'BRK-B']:
    print(f"Processing {company_name}...")
    S0 = data[company_name].values[2880]
    comapny_price = data[company_name].values

    #Create lists for the parameters
    list_a0 = []
    list_a1 = []
    list_b0 = []
    list_b1 = []
    list_gamma = []
    # list_a0_rescaled = []
    # list_a1_rescaled = []
    # list_b0_rescaled = []
    # list_b1_rescaled = []
    #Compute optimal parameters
    for i in tqdm(np.arange(279,2880,100)): # until 9 March 2020
        x = np.array(data[company_name].iloc[(i-250):i])
        a0,a1,b0,b1,gamma = compute_max_parameters(x)
        
        list_a0 += [a0]
        list_a1 += [a1]
        list_b0 += [b0]
        list_b1 += [b1]
        list_gamma += [gamma]
    a0_fix = [list_a0[26], list_a0[26]]
    a1_fix = [list_a1[26], list_a1[26]]
    b0_fix = [list_b0[26], list_b0[26]]
    b1_fix = [list_b1[26], list_b1[26]]
    gamma_fix = [list_gamma[26], list_gamma[26]]
    a0_robust = [min(list_a0), max(list_a0)]
    a1_robust = [min(list_a1), max(list_a1)]
    b0_robust = [min(list_b0), max(list_b0)]
    b1_robust = [min(list_b1), max(list_b1)]
    gamma_robust = [min(list_gamma), max(list_gamma)]
    parameters = {'S0':[S0,S0],'a0_fix': a0_fix, 'a1_fix': a1_fix, 'b0_fix': b0_fix, 'b1_fix': b1_fix, 'gamma_fix': gamma_fix, 'a0_robust': a0_robust, 'a1_robust': a1_robust, 'b0_robust': b0_robust, 'b1_robust': b1_robust, 'gamma_robust': gamma_robust}
    for para_name, para_value in parameters.items():
        print(f"{para_name}\t{para_value[0]}\t{para_value[1]}")

    # Define the path generator function
    generator_fix = path_generator_GAD(
        time_steps=sequence_length,
        S0=S0,
        a0=a0_fix,
        a1=a1_fix,
        b0=b0_fix,
        b1=b1_fix,
        gamma=gamma_fix,
        dt=dt
    )
    generator_robust = path_generator_GAD(
        time_steps=sequence_length,
        S0=S0,
        a0=a0_robust,
        a1=a1_robust,
        b0=b0_robust,
        b1=b1_robust,
        gamma=gamma_robust,
        dt=dt
    )
    # Generate the paths and save them
    folder_path = f'../DATA/GAD_{company_name}'
    os.makedirs(folder_path, exist_ok=True)
    price_fix_train = generator_fix.generate(100000)/ S0 * 10
    torch.save(price_fix_train, folder_path+'/GAD_fix_train.pt')
    price_fix_test = generator_fix.generate(100000)/ S0 * 10
    torch.save(price_fix_test, folder_path+'/GAD_fix_test.pt')
    price_fix_val = generator_fix.generate(100000)/ S0 * 10
    torch.save(price_fix_val, folder_path+'/GAD_fix_val.pt')
    price_robust_train = generator_robust.generate(100000)/ S0 * 10
    torch.save(price_robust_train, folder_path+'/GAD_robust_train.pt')
    price_robust_test = generator_robust.generate(100000)/ S0 * 10
    torch.save(price_robust_test, folder_path+'/GAD_robust_test.pt')
    price_robust_val = generator_robust.generate(100000)/ S0 * 10
    torch.save(price_robust_val, folder_path+'/GAD_robust_val.pt')
    #check

    print("Fix train data shape:", price_fix_train.shape)
    print("Fix test data shape:", price_fix_test.shape)
    print("Fix val data shape:", price_fix_val.shape)
    print("Robust train data shape:", price_robust_train.shape)
    print("Robust test data shape:", price_robust_test.shape)
    print("Robust val data shape:", price_robust_val.shape)
    print(' ')

Processing AAPL...


100%|██████████| 27/27 [00:31<00:00,  1.16s/it]


S0	64.50927734375	64.50927734375
a0_fix	0.06747442236503542	0.06747442236503542
a1_fix	0.15426034261762736	0.15426034261762736
b0_fix	0.015307255745013263	0.015307255745013263
b1_fix	0.28604643732012475	0.28604643732012475
gamma_fix	1.2	1.2
a0_robust	1e-15	1.1713472722638891
a1_robust	0.15426034261762736	1.1705841357901787
b0_robust	-0.013761652967194185	0.24521661305958575
b1_robust	-0.1560054669357991	0.6714145740929368
gamma_robust	0.17763848824856357	1.2
Fix train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
 
Processing MSFT...


100%|██████████| 27/27 [00:35<00:00,  1.31s/it]


S0	143.90606689453125	143.90606689453125
a0_fix	0.07526008675726047	0.07526008675726047
a1_fix	0.11622482806788766	0.11622482806788766
b0_fix	0.02224813120798452	0.02224813120798452
b1_fix	0.2031023017354171	0.2031023017354171
gamma_fix	1.2	1.2
a0_robust	1e-15	0.34645124078795325
a1_robust	0.10596797748510872	1.352963788170841
b0_robust	-0.00571608112262144	0.060765064311854494
b1_robust	-0.06539868920556884	0.41485025843266077
gamma_robust	0.5000696075479992	1.2
Fix train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
 
Processing AMZN...


100%|██████████| 27/27 [00:37<00:00,  1.38s/it]


S0	90.03050231933594	90.03050231933594
a0_fix	0.10789576091790865	0.10789576091790865
a1_fix	0.21872295610179698	0.21872295610179698
b0_fix	0.010518479961781246	0.010518479961781246
b1_fix	0.12967312106132894	0.12967312106132894
gamma_fix	1.0091711476355227	1.0091711476355227
a0_robust	1e-15	0.8921314973414222
a1_robust	0.1754318552147132	1.5674380800341674
b0_robust	-0.013150728747562627	0.32954616445148277
b1_robust	-0.1190975571373096	0.8017276829174065
gamma_robust	0.37305828862615203	1.2
Fix train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
 
Processing 

100%|██████████| 27/27 [00:41<00:00,  1.54s/it]


S0	60.50025177001953	60.50025177001953
a0_fix	0.1109145611153623	0.1109145611153623
a1_fix	0.32014512194417016	0.32014512194417016
b0_fix	0.0038548804839231574	0.0038548804839231574
b1_fix	0.10550705133490279	0.10550705133490279
gamma_fix	0.9158212852191936	0.9158212852191936
a0_robust	1e-15	0.7019487616321505
a1_robust	0.13615342288254978	1.7076161859621857
b0_robust	-0.012541790592237815	0.08386929876758409
b1_robust	-0.08067327636191873	0.4662031867550671
gamma_robust	0.42315388811131155	1.2
Fix train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
 
Processin

100%|██████████| 27/27 [00:40<00:00,  1.51s/it]


S0	193.1300048828125	193.1300048828125
a0_fix	0.11065465690051735	0.11065465690051735
a1_fix	0.2762809059595653	0.2762809059595653
b0_fix	0.013188557242343782	0.013188557242343782
b1_fix	0.03046389147642701	0.03046389147642701
gamma_fix	0.8777524832562036	0.8777524832562036
a0_robust	0.0959519171732222	0.1710159216027636
a1_robust	0.0887268410123356	1.0485521976368115
b0_robust	-0.0027169785611263334	0.06703235521521311
b1_robust	-0.026459572768673226	0.3505830370147668
gamma_robust	0.693507020789021	1.191342188801752
Fix train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Fix val data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust train data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust test data shape: torch.Size([100000, 31]) tensor([10., 10., 10., 10., 10.])
Robust val data shape: torch.Size([100000, 31]) tensor([10., 10., 10.

In [10]:
for company_name in ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'BRK-B']:
    print(f"Processing {company_name} for real test data...")
    real_data_tensor = torch.Tensor(data[company_name])

    price_real_test = torch.Tensor([])
    for start in range(2880, 3180):
        end = start + 31
        part_normalized = real_data_tensor[start:end]/ real_data_tensor[start]*10
        price_real_test = torch.cat((price_real_test, part_normalized.unsqueeze(0)), dim=0)

    print(price_real_test.shape)
    torch.save(price_real_test, f'../Data/GAD_{company_name}/GAD_real_test.pt')
    

Processing AAPL for real test data...
torch.Size([300, 31])
Processing MSFT for real test data...
torch.Size([300, 31])
Processing AMZN for real test data...
torch.Size([300, 31])
Processing GOOGL for real test data...
torch.Size([300, 31])
Processing BRK-B for real test data...
torch.Size([300, 31])
