# Notebook for hyperparameter-tuning of Stochastic models

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../src')
# import some data generating model
from data_processing.post_processing import post_processing
from data_processing.config import post_processing_config
from models.DeepHedger import hedging
from config import data_generation_config
import wandb

## Define train and val data

NOTE: 
- If you need preprocessing of the train data, you can do it here, but it is cleaner if the model class does it so that the input to the data generating model is just the raw data.
- The preprocessing of the train does not need to match the preprocessing of val since the postprocessing of the generated data matches the val data.


In [None]:
train_data = pd.read_csv("../data/raw/spy_daily_closing_prices_train.csv", index_col=0)

val_data = pd.read_csv("../data/raw/spy_daily_closing_prices_val.csv", index_col=0)

val_data = val_data["Close"]
val_data.plot(use_index=True)
val_data = np.array([val_data.values[i:i+30] for i in range(len(val_data)-30 + 1)])
val_data = pd.DataFrame(val_data)
val_data = val_data.div(val_data.iloc[:, 0], axis=0)

test_data = pd.read_csv("../data/raw/spy_daily_closing_prices_test.csv", index_col=0)
test_data = test_data["Close"]
test_data = np.array([test_data.values[i:i+30] for i in range(len(test_data)-30 + 1)])
test_data = pd.DataFrame(test_data)
test_data = test_data.div(test_data.iloc[:, 0], axis=0)

## Wandb login

NOTE:
- You might want to change this as this is my api key. A bit silly that I push the API-key to a public repo but but...

In [None]:
wandb.login(key="52ea61320bbc9ee2b773e909700366e65977cd0f")

## Define the sweep config.

NOTE:
- Here is an example, just change to kappa and theta and whatever for the stochastic models

In [None]:
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'loss',
        'goal': 'minimize'
    },
    'parameters': {
        'noise_scale': {
            'distribution': 'uniform',
            'min': 0.01,
            'max': 0.15
        },
        'learning_rate': {
            'distribution': 'uniform',
            'min': 0.001,
            'max': 0.1
        },
        'hidden_size': {
            'values': [4, 8,16, 32]
        },
    }
}

## Initialize the sweep

NOTE:
- Set a project name. I have called mine teacher_forcing_hyperopt for instance. So you can call it heston_hyperopt
- When you run this a sweep id will be printed. If you want to cancel and resume a sweep you can do that just comment out the wandb.sweep line and uncomment the sweep_id line and set sweep_id equal to the sweep_id that you generated the first time.

In [None]:
sweep_id = wandb.sweep(sweep_config, project=project_name)
# sweep_id = ""

## Define the functions here

NOTE:
- You might need to make some changes here depending on how you train the model and generate the data.
- Important thing: model.synth_data has to be a (MxN) dataframe. 

In [None]:
def objective(config):
    hyperparameters = {
            "noise_scale": config.noise_scale,
            "learning_rate": config.learning_rate,
            "hidden_size": config.hidden_size
        }
    
    # model = TeacherForcing(train_data=train_data, N=data_generation_config["N"], M=data_generation_config["M"], load_params=False, config=hyperparameters)
    model.generate_data()

    df_post_processed = post_processing(model.synth_data, **post_processing_config)

    print('Hedging')
    val_loss = hedging(df_post_processed, val_data)

    return val_loss

def main():
    wandb.init()
    val_loss = objective(wandb.config)
    wandb.log({"val_loss": val_loss,  "loss": val_loss})

In [None]:
wandb.agent(sweep_id, function=main, count=10, project=project_name)

## Testing

NOTE:
- Set the model_name to the name of the model, i.e. quant_gan
- Of course, also, set the optimal hyperparameters
- Remember to create the folders data/processed and data/performance from root

In [None]:
model_name = ""
optimal_hyperparameters = {
    "clip_value": 0.04846214746770365,
    "lr": 0.0011239390884900244,
    "num_epochs": 15,
    "nz": 3,
    "batch_size": 64,
    "seq_len": 30,
}

In [None]:
test_losses = []
val_losses = []

for i in range(5):

    # model = TeacherForcing(train_data=train_data, N=data_generation_config["N"], M=data_generation_config["M"], load_params=False, config=hyperparameters)
    model.fit_to_data()

    model.generate_data()
    df = 1*np.exp(model.synth_data)

    df_post_processed = post_processing(df, **post_processing_config)

    val_loss, test_loss = hedging(df_post_processed, val_data, test_data)
    print(f"Validation loss: {val_loss}, Test loss: {test_loss}")
    test_losses.append(test_loss)
    val_losses.append(val_loss)

print(f"Average validation loss: {np.mean(val_losses)}, Average test loss: {np.mean(test_losses)}")
df_post_processed.to_csv(f"../data/processed/{model_name}_synth_data.csv")



## Saving mean and std of losses in latex

In [None]:
# Create LaTeX table with results
val_mean = np.mean(val_losses)
val_std = np.std(val_losses)
test_mean = np.mean(test_losses)
test_std = np.std(test_losses)

latex_table = f"""
\\begin{{table}}[h]
\\centering
\\begin{{tabular}}{{lc}}
\\hline
Metric & Value \\\\
\\hline
Validation Loss & {val_mean:.6f} $\\pm$ {val_std:.6f} \\\\
Test Loss & {test_mean:.6f} $\\pm$ {test_std:.6f} \\\\
\\hline
\\end{{tabular}}
\\caption{{Validation and Test Loss Statistics}}
\\label{{tab:loss_stats}}
\\end{{table}}
"""

with open(f'..data/performance/{model_name}.tex', 'w') as f:
    f.write(latex_table)
