In [None]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm 
from empyrical import sharpe_ratio, sortino_ratio
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.evaluation import evaluate_policy
import talib
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
import itertools
import warnings
import optuna
import torch

In [None]:
def resample_data(data):
    df=data.copy()
    # Assuming your DataFrame is named 'df' and it's a cuDF DataFrame
    df['time'] = pd.to_datetime(df['time'])
    df = df.set_index('time')

    # Resample and aggregate for each interval
    for interval in ['5T', '15T', '30T']:
        resampled = df.resample(interval).agg({'Open': 'first','High': 'max','Low': 'min','Close': 'last','Volume': 'sum'})
        # Create new column names based on the interval
        label = interval.replace('T', 'min')
        df[f'Open_{label}'] = resampled['Open']
        df[f'High_{label}'] = resampled['High']
        df[f'Low_{label}'] = resampled['Low']
        df[f'Close_{label}'] = resampled['Close']
        df[f'Volume_{label}'] = resampled['Volume']

   # Since resampling might introduce NaNs for periods without data, you might need to handle these
   # For example, you can fill NaNs with the last valid observation using fillna(method='ffill')
    df = df.fillna(method='ffill')

    # Reset the index if needed
    df = df.reset_index()
    return df

In [None]:
def calculate_amihud_illiquidity(close, volume):
    """
    Calculate Amihud Illiquidity

    Parameters:
    close (pd.Series): Series of close prices.
    volume (pd.Series): Series of volumes.

    Returns:
    amihud (pd.Series): Amihud Illiquidity values.
    """
    daily_return = close.pct_change()
    amihud = daily_return.abs() / volume
    return amihud

In [None]:
def fetch_data(data):
    stock_data=data.copy()
    High=stock_data['High']
    Low=stock_data['Low']
    Open=stock_data['Open']
    Close=stock_data['Close']
    Volume=stock_data['Volume']
    High5=stock_data['High_5min']
    Low5=stock_data['Low_5min']
    Close5=stock_data['Close_5min']
    Volume5=stock_data['Volume_5min']
    High15=stock_data['High_15min']
    Low15=stock_data['Low_15min']
    Close15=stock_data['Close_15min']
    Volume15=stock_data['Volume_15min']
    High30=stock_data['High_30min']
    Low30=stock_data['Low_30min']
    Close30=stock_data['Close_30min']
    Volume30=stock_data['Volume_30min']
    stock_data['price']=(stock_data['Open']+stock_data['Close']+stock_data['High']+stock_data['Low'])/4
    
    stock_data['RSI'] = talib.RSI(Close, timeperiod=14)
    
    # 佳庆指标（Chaikin Oscillator），该指标基于AD曲线的指数移动均线而计算得到。属于趋势型因子
    stock_data['ADOSC'] = talib.ADOSC(High, Low, Close, Volume, fastperiod=3, slowperiod=10)
    # 平均动向指数，DMI因子的构成部分。属于趋势型因子
    stock_data['ADX'] = talib.ADX(High, Low, Close,timeperiod=14)
    # 绝对价格振荡指数
    stock_data['APO'] = talib.APO(Close, fastperiod=12, slowperiod=26)
    # 均势指标
    stock_data['BOP'] = talib.BOP(Open, High, Low, Close)
    stock_data['SAR'] = talib.SAR(High, Low, acceleration=0.02, maximum=0.2)
    # 威廉指标
    stock_data['WILLR'] = talib.WILLR(High, Low, Close, timeperiod=14)
    # PLUS_DI 更向指示器
    stock_data['PLUS_DM'] = talib.PLUS_DM(High, Low, timeperiod=14)

    stock_data['SLOWK_30min'],stock_data['SLOWD_30min']=talib.STOCH(High30,Low5,Close30)
    stock_data['amihud']=calculate_amihud_illiquidity(stock_data['Close'],stock_data['Volume'])
    stock_data['amihud_30min']=calculate_amihud_illiquidity(stock_data['Close_30min'],stock_data['Volume_30min'])
    stock_data['VROCP6_5min'] = talib.ROCP(Volume5, timeperiod=6)
    stock_data['VROCP6_15min'] = talib.ROCP(Volume15, timeperiod=6)

    # Add code to keep only the specified columns
    cols_to_keep = ['time','price','Volume', 'amihud', 'SLOWK_30min', 'VROCP6_5min', 'VROCP6_15min', 'amihud_30min','PLUS_DM', 'WILLR', 'RSI', 'APO', 'BOP', 'ADX', 'ADOSC', 'SAR']
    stock_data = stock_data[cols_to_keep]

    stock_data[np.isinf(stock_data)] = np.nan
    stock_data.dropna(inplace=True)
    stock_data['time'] = pd.to_datetime(stock_data['time'])
    stock_data.set_index('time', inplace=True)

    return stock_data

In [None]:
class CryptoTradingEnv(gym.Env):
    metadata = {'render.modes': ['human', 'system', 'none']}
    def __init__(self, df, lookback_window_size=10, initial_balance=25000, commission=0.0005, serial=True,render_mode='none'):
        super(CryptoTradingEnv, self).__init__()
        self.df = df.dropna()
        self.render_mode = render_mode
        self.lookback_window_size = lookback_window_size
        self.initial_balance = initial_balance
        self.commission = commission
        self.serial = serial
        self.action_space = spaces.MultiDiscrete([3, 10])
        self.observation_space = spaces.Box(low=0, high=1, shape=(20, lookback_window_size + 1), dtype=np.float16)
        self.scaler = MinMaxScaler()
        self.balance_list=[self.initial_balance]
        
    def _get_current_price(self):
        #Open=self.df['Open'].values[self.frame_start + self.current_step]
        #Close=self.df['Close'].values[self.frame_start + self.current_step]
        #High=self.df['High'].values[self.frame_start + self.current_step]
        #Low=self.df['Low'].values[self.frame_start + self.current_step]
        price=self.df['price'].values[self.frame_start + self.current_step]
        return price
    
    def get_max_volume(self):
        return 0.25*self.df['Volume'].values[self.frame_start + self.current_step]
    
    def _next_observation(self):
        end = self.current_step + self.lookback_window_size + 1
        scaled_df = self.active_df.values[:end].astype('float64')
        scaled_df = self.scaler.fit_transform(scaled_df)
        scaled_df = pd.DataFrame(scaled_df, columns=self.df.columns)

        cols_to_keep = ['price', 'Volume', 'amihud', 'SLOWK_30min', 'VROCP6_5min', 'VROCP6_15min', 'amihud_30min',
                        'PLUS_DM', 'WILLR', 'RSI', 'APO', 'BOP', 'ADX', 'ADOSC', 'SAR']

        obs = np.array([scaled_df[col].values[self.current_step:end] for col in cols_to_keep])


        scaled_history = self.scaler.fit_transform(self.account_history)

        obs = np.append(
            obs, scaled_history[:, -(self.lookback_window_size + 1):], axis=0)

        return obs

    def _reset_session(self):
        self.current_step = 0
        self.reward_len=0

        if self.serial:
            self.steps_left = len(self.df) - self.lookback_window_size - 1
            self.frame_start = self.lookback_window_size
        else:
            self.steps_left = np.random.randint(1, MAX_TRADING_SESSION)
            self.frame_start = np.random.randint(
                self.lookback_window_size, len(self.df) - self.steps_left)

        self.active_df = self.df[self.frame_start - self.lookback_window_size:
                                 self.frame_start + self.steps_left]
        
    def reset(self, **kwargs):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.btc_held = 0
        self._reset_session()

        self.account_history = np.repeat([
            [self.balance],
            [0],
            [0],
            [0],
            [0]
        ], self.lookback_window_size + 1, axis=1)
        self.trades = []

        return self._next_observation()

    def _take_action(self, action, current_price):
        action_type = action[0]
        amount = action[1] / 10
        btc_bought = 0
        btc_sold = 0
        cost = 0
        sales = 0
        max_volume=self.get_max_volume()
        #print('last value: ',self.net_worth)
        #print('last hold: ',self.btc_held)
        #print('last cash: ', self.balance)
        if action_type < 1:
            btc_bought = min((self.balance / current_price) * amount,max_volume)
            cost = btc_bought * current_price * (1 + self.commission)

            self.btc_held += btc_bought
            self.balance -= cost

        elif action_type < 2:
            btc_sold = min(self.btc_held * amount,max_volume)
            sales = btc_sold * current_price * (1 - self.commission)

            self.btc_held -= btc_sold
            self.balance += sales

        if btc_sold > 0 or btc_bought > 0:
            self.trades.append({'step': self.frame_start + self.current_step,
                                'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost,
                                'type': "sell" if btc_sold > 0 else "buy"})

        self.net_worth = self.balance + self.btc_held * current_price
        self.balance_list.append(self.net_worth)
        self.account_history = np.append(self.account_history, [
            [self.balance],
            [btc_bought],
            [cost],
            [btc_sold],
            [sales]
        ], axis=1)


    def step(self, action):
        #print([self.current_step,self.steps_left,self.reward_len])
        current_price = self._get_current_price()

        prev_net_worth = self.net_worth

        self._take_action(action, current_price)

        self.steps_left -= 1
        self.current_step += 1

        if self.steps_left == 0:
        #    self.balance += self.btc_held * current_price
        #    self.btc_held = 0

            self._reset_session()

        obs = self._next_observation()
        length=min(11,self.current_step)
        balances = np.array(self.balance_list)[-length:]
        returns=np.diff(balances)/balances[:-1]
        #sharpe = sharpe_ratio(returns)
        sortino = sortino_ratio(returns)
        if np.isnan(sortino):
            sortino=0
        if sortino==np.inf:
            sortino=10
        reward = sortino
        self.reward_len+=1
        
        done = self.net_worth <= 0
        
        return obs, reward, done, {}
    
    def render(self, mode='human', **kwargs):
        if mode == 'system':
            print('Price: ' + str(self._get_current_price()))
            print(
                'Bought: ' + str(self.account_history[2][self.current_step + self.frame_start]))
            print(
                'Sold: ' + str(self.account_history[4][self.current_step + self.frame_start]))
            print('Net worth: ' + str(self.net_worth))

In [None]:
def optimize_agent(trial):

    data=training_data.copy()
    train_size =len(data)-len(testing_data) 
    train = data[:train_size]
    evaluate = data[train_size:]

    # Create the training environment
    train_env = CryptoTradingEnv(train,initial_balance=14483.7497, commission=0.0005, render_mode='system')

    # Define hyperparameters using the trial object
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)
    n_steps = trial.suggest_categorical("n_steps", [128, 256, 512, 1024, 2048])
    gamma = trial.suggest_float("gamma", 0.9, 0.9999, log=True)
    gae_lambda = trial.suggest_float("gae_lambda", 0.8, 0.99)
    clip_range = trial.suggest_float("clip_range", 0.1, 0.3)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256])

    # Setup the model
    model = RecurrentPPO("MlpLstmPolicy", train_env, verbose=0, 
                learning_rate=learning_rate,
                n_steps=n_steps,
                gamma=gamma,
                gae_lambda=gae_lambda,
                clip_range=clip_range,
                batch_size=batch_size,
                tensorboard_log="./tensorboard/",
                device='cuda')
    
    # Train the model
    model.learn(total_timesteps=10000) 
    eval_env = CryptoTradingEnv(evaluate,initial_balance=14483.7497, commission=0.0005,render_mode='system')
    obs = eval_env.reset()
    rewards_list,done=[],False
    net_worth_list=[]
    for _ in range(len(evaluate)):
        action, _states = model.predict(obs)
        obs, rewards, done, info = eval_env.step(action)
        rewards_list.append(rewards)
        net_worth_list.append(eval_env.net_worth)
        
    return np.mean(rewards_list)



In [None]:
ltc_data=pd.read_csv('csv\\LTCUSD_1m_ltc_usd_60.csv')
resample_ltc=resample_data(ltc_data)
ltc_feature=fetch_data(resample_ltc)
# Define your date ranges
start_date = '2021-09-14 00:00:00'
end_date = '2023-04-19 00:00:00'

# Split the data into training and testing sets
training_data = ltc_feature[start_date:end_date]
testing_data = ltc_feature[end_date:][1:]

In [None]:
# Run the optimization
study = optuna.create_study(direction="maximize")
study.optimize(optimize_agent, n_trials=100)  # Adjust the number of trials

# Print the best hyperparameters
print("Best hyperparameters: ", study.best_trial.params)

In [None]:
best_params=study.best_trial.params

In [None]:
# Create the environment with training data
train_env = CryptoTradingEnv(training_data,initial_balance=14483.7497, commission=0.0005,render_mode='system')
# Assuming best_params contains your optimized hyperparameters
model = RecurrentPPO("MlpLstmPolicy", train_env, verbose=1, tensorboard_log="./tensorboard/",
                     device='cuda',
                     learning_rate=best_params['learning_rate'],
                     n_steps=best_params['n_steps'],
                     batch_size=best_params['batch_size'],
                     gamma=best_params['gamma'],
                     gae_lambda=best_params['gae_lambda'],
                     clip_range=best_params['clip_range'],
                     policy_kwargs=dict(net_arch=[64, 64], activation_fn=torch.nn.ReLU))
# Train the agent
model.learn(total_timesteps=len(train_env.df))

In [None]:
model.save("RecurrentPPO_LTC")
loaded_model = RecurrentPPO.load("RecurrentPPO_LTC")

In [None]:
test_env = CryptoTradingEnv(testing_data, initial_balance=14483.7497, commission=0.0005,render_mode='system')
obs = test_env.reset()
net_worth_list=[]
action_list=[]
for _ in tqdm(range(len(testing_data)), desc="Testing"):
    action, _states = loaded_model.predict(obs)
    action_list.append(action)
    obs, rewards, done, info = test_env.step(action)
    test_env.render(mode="system")
    net_worth_list.append(test_env.net_worth)

In [None]:
# Assuming testing_data.index is your DatetimeIndex
dates = testing_data.index

# Calculate the number of days
num_days = (dates.max() - dates.min()).days + 1

print(f"Number of days in the index: {num_days}")

In [None]:
def calculate_financial_metrics(net_worth_list, num_days, initial_value, action_list):
    # Calculate daily returns
    returns_list = [(net_worth_list[i] - net_worth_list[i-1]) / net_worth_list[i-1] for i in range(1, len(net_worth_list))]
    returns = np.array(returns_list)

    # Calculate maximum drawdown
    rolling_max = np.maximum.accumulate(net_worth_list)
    daily_drawdown = net_worth_list / rolling_max - 1.0
    max_drawdown = np.min(daily_drawdown)

    # Calculate turnover rate
    buys_and_sells = sum(1 for action in action_list if action[0] in [0, 1])
    turnover_rate = buys_and_sells / len(net_worth_list)

    # Calculate total return
    total_return = (net_worth_list[-1] - initial_value) / initial_value

    # Calculate annualized total return
    annualized_total_return = total_return * 252 / num_days

    # Calculate average daily return and annualize it
    daily_return = returns.mean() * len(net_worth_list) / num_days
    annualized_daily_return = 252 * daily_return

    # Calculate annualized standard deviation (sigma)
    annualized_sigma = returns.std() * np.sqrt(252 * len(net_worth_list) / num_days)

    # Calculate risk-adjusted return (Sharpe ratio)
    sharpe_ratio = annualized_daily_return / annualized_sigma

    # Plotting the net worth list
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(net_worth_list)
    plt.title("Net Worth Over Time")
    plt.xlabel("Time (Days)")
    plt.ylabel("Net Worth")

    # Plotting the drawdown
    plt.subplot(1, 2, 2)
    plt.plot(daily_drawdown)
    plt.title("Drawdown Over Time")
    plt.xlabel("Time (Days)")
    plt.ylabel("Drawdown")
    plt.tight_layout()
    plt.show()

    # Creating a DataFrame for the metrics
    metrics = pd.DataFrame({
        'Metric': ['Annualized Total Return', 'Annualized Daily Return', 'Annualized Sigma', 'Sharpe Ratio', 'Max Drawdown', 'Turnover Rate'],
        'Value': [f"{round(annualized_total_return * 100, 4)}%", f"{round(annualized_daily_return * 100, 4)}%", 
                  f"{round(annualized_sigma * 100, 4)}%", f"{round(sharpe_ratio, 4)}", f"{round(max_drawdown * 100, 4)}%", f"{round(turnover_rate * 100, 4)}%"]
    })

    return metrics

In [None]:
initial_value = 14483.7497 # Initial value of the investment
metrics = calculate_financial_metrics(net_worth_list, num_days, initial_value, action_list)
print(metrics)

In [None]:
actions = [action[0] for action in action_list]

# Create a DataFrame
df = pd.DataFrame({
    'Net Worth': net_worth_list,
    'Action': actions
}, index=testing_data.index)


In [None]:
# Output to CSV
df.to_csv('net_worth_actions_LTC.csv', index_label='Date')