In [None]:
import os
import time
import datetime
import random
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torch.utils.tensorboard import SummaryWriter

from tqdm import tqdm

import importlib

from model import LSTM
from dataset import Sliding, SlideDataset, yh_get_company_dat
from myutils import Loss_Meter, Config, plot_results
from optimizers import Optimizer
from criterions import Criterion

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
config = Config(
	SEED = 0,

	# For dataset
	WINDOW_SIZE = 15,

	# For dataloader
	BATCH_SIZE = 4, WORKERS = 0, 

	# For fixed number of steps, set Threshold = 0.
	THRESHOLD = 0e-04,

	EPOCHS = 30,
	CHECKPOINT = 10,
	LOG_DIR = os.path.join("logs", "{}".format(now)).replace("\\", "/"),

	# Model
	INPUT_SIZE=1, HIDDEN_SIZE=7, NUM_LAYERS=1, DROP=0., 

	OPTIMIZER = 'Adam',
	LOSS = 'MSELoss',

	# For Adam
	LR = 1e-03,
	WEIGHT_DECAY = 1e-05,

	# For SGD
	MOMENTUM = 9e-01, 
	DAMPENING = 0e-00,
)

set_seed(config.SEED)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

# Get Data

In [None]:
df = yh_get_company_dat(ticker='JPM')
df

# Dataloader

In [None]:
slided = Sliding(df, X_period=config.WINDOW_SIZE)

train_set, val_set, test_set = slided.get_Dataset()

train_loader = DataLoader(train_set, batch_size = config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=True)
val_loader = DataLoader(val_set, batch_size = 128, num_workers=config.WORKERS, shuffle=False)
test_loader = DataLoader(test_set, batch_size = 128, num_workers=config.WORKERS, shuffle=False)

# Model

In [None]:
if not os.path.isdir("logs"):
	os.mkdir("logs")

best_model_path = os.path.join(config.LOG_DIR, 'checkpoint_best.pth').replace('\\', '/')

model = LSTM(config,
			# drop=0.1, 
            act_layer = nn.LeakyReLU, 
			device=device)

model.to(device, dtype=torch.float64)

# Optimizer, Loss, LR Scheduler, Gradient Scaler

In [None]:
optimizer = Optimizer(config, model).get_Optim()
criterion = Criterion(config).get_Criterion()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, threshold=1e-03)
g_scaler = torch.amp.GradScaler(device=device.type)

# Loss Meters

In [None]:
train_meter = Loss_Meter(loss_type ='train')
val_meter = Loss_Meter(loss_type ='val')
test_meter = Loss_Meter(loss_type ='test')

# Tensorboard Logging

In [None]:
writer = SummaryWriter(log_dir=config.LOG_DIR)
layout = {"Loss Plot" : {'loss': ["Multiline", ['train_loss', 'val_loss']]}}
writer.add_custom_scalars(layout)

# Model Training

In [None]:
print(model)

epoch = 1
min_loss = 1e+05
val_loss = torch.tensor([1e+05])
train_loss = torch.tensor([1e+05])
loss_diff = 1e+05

while True:
	model.train()
	with tqdm(train_loader, desc='Epoch {}'.format(str(epoch).zfill(2))) as tepoch:
		for i, (data, target) in enumerate(tepoch):
			
			data, target = data.to(device), target.to(device)
			
			if config.OPTIMIZER == 'LBFGS':
				def closure():
					optimizer.zero_grad()
					output = model(data)
					loss = criterion(output, target)
					loss.backward()
					train_meter.update(loss.detach().cpu().item())
					return loss
				optimizer.step(closure)
			else:
				optimizer.zero_grad()

				output = model(data)
				
				train_loss = criterion(output, target)
				train_meter.update(train_loss.detach().cpu().item())
				
				g_scaler.scale(train_loss).backward()
				
				g_scaler.unscale_(optimizer)
				nn.utils.clip_grad_norm_(model.parameters(), 1.0)

				g_scaler.step(optimizer)
				g_scaler.update()


			if i % (len(tepoch)-1) == 0:
				
				model.eval()
				with torch.no_grad():
					for val_data, val_target in val_loader:
						val_data, val_target = val_data.to(device), val_target.to(device)
						val_output = model(val_data)
						val_loss = criterion(val_output, val_target)
						val_meter.update(val_loss.detach().cpu().item())
				
				diff = abs(train_meter.mean() - val_meter.mean())
				if diff <= loss_diff: 
					torch.save(model.state_dict(), best_model_path)
					# min_loss = val_meter.mean()
					loss_diff = diff

				if epoch % config.CHECKPOINT == 0:
					checkpoint_path = os.path.join(config.LOG_DIR,
						'checkpoint_{}.pth'.format(str(epoch).zfill(2))).replace('\\','/')
					torch.save(model.state_dict(), checkpoint_path)
					
				model.train()
				scheduler.step(val_loss)
			
			tepoch.set_postfix(
				train_loss = "{:.10e}".format(train_meter.mean()),
				val_loss = "{:.5e}".format(val_meter.mean())
			)
	writer.add_scalar('train_loss'.format(epoch), train_meter.mean(), epoch)
	writer.add_scalar('val_loss'.format(epoch), val_meter.mean(), epoch)

	if ((not config.THRESHOLD) and epoch == config.EPOCHS) or (loss_diff <= config.THRESHOLD):
		break

	epoch += 1 
	torch.cuda.empty_cache()

# Model Testing

In [None]:
# model.load_state_dict(torch.load(best_model_path))
model.eval()
with torch.no_grad():
	with tqdm(test_loader, desc='Test {}'.format(str(epoch))) as tepoch:
		for test_data, test_target in tepoch:
			test_data, test_target = test_data.to(device), test_target.to(device)
			
			test_output = model(test_data)
			test_loss = criterion(test_output, test_target)
			test_meter.update(test_loss.detach().cpu().item())
			
			tepoch.set_postfix(test_loss = "{:.5e}".format(test_meter.mean()))

# Performance

In [None]:
y_preds = np.concatenate((
	model(train_set.X.to(device)).detach().cpu().numpy().flatten(), 
	model(val_set.X.to(device)).detach().cpu().numpy().flatten(),
	model(test_set.X.to(device)).detach().cpu().numpy().flatten()
))

y_preds = slided.unscale(config, y_preds)
# df.dropna(inplace=True)
# df['Predictions'] = np.pad(y_preds, (0, len(df) - len(y_preds)))
df['Predictions'] = y_preds
df

In [None]:
plot_results(df, train_set.__len__(), val_set.__len__(), test_set.__len__())