## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append('../') 

import gymnasium as gym
from src.trading_env_final import TradingEnv

from stable_baselines3 import PPO, A2C
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
import optuna

## Create Env

In [None]:
df = pd.read_csv("../data/PNB_Indicators.csv")
df_option_data = pd.read_csv("../data/PNB_Options_Data.csv")

options_path = "../data/CSV" # has date realted to options on each day

window_size = 10
start_index = 748 # 2015-01-01
end_index = 2723 # 2022-12-30
frame_bound = (start_index, end_index)
margin = 1000000000
lot_size = 1000
spread = 5 # no.of stikes from atm to consider as itm and otm

env = TradingEnv(df, df_option_data, window_size, frame_bound, margin, lot_size, spread, options_path)

In [None]:
options_path1 = "../data/CSV" 
frame_bound=(2726, 3040)
speread = 5
new_env = TradingEnv(df, df_option_data, window_size, frame_bound, margin, lot_size, spread, options_path)
new_env = DummyVecEnv([lambda: new_env])

In [None]:
t,t1 = env.reset()

In [None]:
check_env(env)
t,t1 = env.reset()

## HyperParameter Tuning

In [None]:
def objective(trial):
    # Define the hyperparameters to tune
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    gamma = trial.suggest_float('gamma', 0.9, 0.999)
    gae_lambda = trial.suggest_float ('gae_lambda', 0.8, 1.0)
    n_steps = trial.suggest_int('n_steps', 5, 20)
    entropy_coef = trial.suggest_float('entropy_coef', 0.01, 0.1)
    
    # Create the environment and model with these hyperparameters
    model = A2C("MlpPolicy", env, verbose=0,
                learning_rate=learning_rate,
                gamma=gamma,
                gae_lambda=gae_lambda,
                n_steps=n_steps,
                ent_coef=entropy_coef)
                
    # Train the model
    model.learn(total_timesteps=50000)
    
    # Evaluate the model
    obs = new_env.reset()
    done = False
    total_profit = 0
    while not done:
        action, _states = model.predict(obs)
        obs, rewards, done, info = new_env.step(action)

    total_profit = info[0]['total_profit']      
    return -total_profit  # Since Optuna minimizes

In [None]:
# Set up SQLite storage and specify the study name.
storage_url = "sqlite:///history.db"
study_name = "study_a2c"  # Give a unique name to your study.

# Create or load the study
study = optuna.create_study(study_name=study_name, storage=storage_url, load_if_exists=True, direction="minimize")

In [None]:
def early_stopping_callback(study, trial, threshold_no_improvement, min_trials):
    print("Inside early stopping callback")
    
    # Stop the study if there is no improvement over a certain number of trials
    if len(study.trials) < min_trials:
        return
    best_value = study.best_value
    current_value = trial.value
    if current_value > best_value:
        no_improvement_count = study.user_attrs.get('no_improvement_count', 0) + 1
        study.set_user_attr('no_improvement_count', no_improvement_count)
    else:
        study.set_user_attr('no_improvement_count', 0)

    if study.user_attrs.get('no_improvement_count', 0) >= threshold_no_improvement:
        study.stop()

# Configure the number of trials without improvement after which to stop
threshold_no_improvement = 10  # Number of trials without improvement
min_trials = 10  # Minimum number of trials before considering stopping

In [None]:
study.optimize(objective, n_trials=100, callbacks=[lambda study, trial: early_stopping_callback(study, trial, threshold_no_improvement, min_trials)])

print("Study finished with best value:", study.best_value)

In [None]:
# Best trial
print("Best trial:")
trial = study.best_trial
print(" Value: ", trial.value)
print(" Params: ")
for key, value in trial.params.items():
    print(f"{key}: {value}")
