### Import thư viện

In [1]:
import sys  
import pandas as pd
import numpy as np
import time
from gym_trading_env.environments import TradingEnv
import gymnasium as gym
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np

### Load dataset

In [2]:
# Import your dataset
df = pd.read_pickle("data/bybit-ETHUSDT-1h.pkl")
df = df.rename_axis('date', axis='index').drop(['date_close'], axis=1)
df.sort_index(inplace= True)
df.dropna(inplace= True)
df.drop_duplicates(inplace=True)
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-12-31 18:00:00,1200.92,1201.1,1199.35,1199.78,326.15632
2022-12-31 19:00:00,1199.78,1205.7,1199.42,1202.76,887.63063
2022-12-31 20:00:00,1202.76,1203.86,1202.24,1202.44,513.74465
2022-12-31 21:00:00,1202.44,1203.19,1199.85,1200.24,649.63971
2022-12-31 22:00:00,1200.24,1201.34,1193.08,1196.32,1587.73093


### Static Feature Initialization

In [3]:
# Generating features
# WARNING : the column names need to contain keyword 'feature' !
df["feature_close"] = df["close"].pct_change()
df["feature_open"] = df["open"]/df["close"] - 1
df["feature_high"] = df["high"]/df["close"] - 1
df["feature_low"] = df["low"]/df["close"] - 1
df["feature_volume"] = df['volume'] / df['volume'].rolling(7*24).max()
df.dropna(inplace= True)

In [4]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,feature_close,feature_open,feature_high,feature_low,feature_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-07 17:00:00,1262.48,1263.41,1262.04,1262.67,124.39588,0.00015,-0.00015,0.000586,-0.000499,0.011585
2023-01-07 18:00:00,1262.67,1264.31,1262.66,1263.95,121.37665,0.001014,-0.001013,0.000285,-0.001021,0.011304
2023-01-07 19:00:00,1263.95,1265.62,1263.95,1265.1,431.70447,0.00091,-0.000909,0.000411,-0.000909,0.040205
2023-01-07 20:00:00,1265.1,1265.42,1262.91,1263.91,228.01033,-0.000941,0.000942,0.001195,-0.000791,0.021235
2023-01-07 21:00:00,1263.91,1266.91,1262.63,1264.02,545.12295,8.7e-05,-8.7e-05,0.002286,-0.0011,0.050768


### Reward function

In [5]:
# Create reward function with the history object
def reward_function(history):
    return np.log(history["portfolio_valuation", -1] / history["portfolio_valuation", -2]) #log (p_t / p_t-1 )

### Render each step

In [6]:
def render_matplotlib(data_list):
    # Tạo DataFrame từ dữ liệu
    df = pd.DataFrame(data_list)

    # Thêm cột Date_Num để matplotlib có thể xử lý thời gian
    df['Date_Num'] = mdates.date2num(df['date'].astype('M8[s]').tolist())

    # Tạo danh sách OHLC cho candlestick
    ohlc = df[['Date_Num', 'data_open', 'data_high', 'data_low', 'data_close', 'data_volume']].values

    # Vẽ biểu đồ
    fig, ax = plt.subplots(figsize=(10, 6))

    # Vẽ candlestick
    for row in ohlc:
        ax.plot([row[0], row[0]], [row[3], row[2]], color='black')  # Thanh high-low
        ax.bar(row[0], row[4] - row[1], width=0.02, bottom=min(row[1], row[4]),
            color='green' if row[4] > row[1] else 'red')

    # Định dạng trục X để hiển thị thời gian
    ax.xaxis_date()
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))

    # Thêm thông tin bổ sung lên biểu đồ
    last_row = df.iloc[-1]
    info_text = f"Portfolio: {last_row['portfolio_valuation']:.2f}\nReward: {df['reward'].sum():.5f}\nPosition: {last_row['position']}\nVolume: {last_row['data_volume']}"

    # Tạo khung cho văn bản
    props = dict(boxstyle='round', facecolor='lightgrey', alpha=0.5)
    ax.text(0.95, 0.95, info_text, fontsize=14, ha='right', va='top', transform=fig.transFigure, 
            bbox=props, color='blue')

    # Thiết lập nhãn và lưới
    ax.set_ylabel('Price')
    ax.set_xlabel('Date')
    ax.grid(True)

    plt.title("Candlestick Chart - Gym Trading Environment", fontsize=14)

    # Xoay nhãn trục X để dễ đọc hơn
    plt.xticks(rotation=45)

    # Hiển thị biểu đồ
    plt.tight_layout()
    plt.show()


### Environment Initialization

In [7]:
env = gym.make(
        "TradingEnv",
        name= "ETHUSDT",
        df = df,
        windows= 3,
        positions = [ -1, 0, 1], # From -1 (=SHORT), to +1 (=LONG)
        initial_position = 0, #Initial position
        trading_fees = 0.01/100, # 0.01% per stock buy / sell
        borrow_interest_rate= 0.0003/100, #per timestep (= 1h here)
        reward_function = reward_function,
        portfolio_initial_value = 1000, # in FIAT (here, USD)
        max_episode_duration = 500,
    )

In [8]:
env.get_wrapper_attr('add_metric')('Position Changes', lambda history : np.sum(np.diff(history['position']) != 0) )
env.get_wrapper_attr('add_metric')('Episode Lenght', lambda history : len(history['position']) )

In [9]:
done, truncated = False, False
observation, info = env.reset()
info_list = [info]

In [10]:
info

{'idx': 1072,
 'step': 0,
 'date': numpy.datetime64('2023-02-21T09:00:00.000000000'),
 'position_index': 1,
 'position': 0,
 'real_position': 0,
 'data_close': 1684.54,
 'data_high': 1687.33,
 'data_open': 1676.02,
 'data_low': 1671.0,
 'data_volume': 4911.5555,
 'portfolio_valuation': 1000.0,
 'portfolio_distribution_asset': 0,
 'portfolio_distribution_fiat': 1000.0,
 'portfolio_distribution_borrowed_asset': 0,
 'portfolio_distribution_borrowed_fiat': 0,
 'portfolio_distribution_interest_asset': 0,
 'portfolio_distribution_interest_fiat': 0,
 'reward': 0}

In [11]:
observation

array([[-0.00217261,  0.00217735,  0.0022419 , -0.00204823,  0.0535113 ,
         0.        ,  0.        ],
       [-0.01636823,  0.01664061,  0.02325748, -0.00230904,  0.28061602,
         0.        ,  0.        ],
       [ 0.00508347, -0.00505776,  0.00165624, -0.0080378 ,  0.25020847,
         0.        ,  0.        ]], dtype=float32)

### Running

In [12]:
while not done and not truncated:
    action = env.action_space.sample()
    observation, reward, done, truncated, info = env.step(action)
    info_list.append(info)
    #render_matplotlib(info_list)

Market Return : -0.57%   |   Portfolio Return : -0.18%   |   Position Changes : 336   |   Episode Lenght : 500   |   


### Save render using web

In [13]:
env.get_wrapper_attr('save_for_render')()

### Running render web

In [14]:
import pandas as pd
from gym_trading_env.renderer import Renderer


renderer = Renderer(render_logs_dir="render_logs")

# Add Custom Lines (Simple Moving Average)
renderer.add_line( name= "sma10", function= lambda df : df["close"].rolling(10).mean(), line_options ={"width" : 1, "color": "purple"})
renderer.add_line( name= "sma20", function= lambda df : df["close"].rolling(20).mean(), line_options ={"width" : 1, "color": "blue"})

renderer.run()

 * Serving Flask app 'gym_trading_env.renderer'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [14/Oct/2024 16:52:13] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:52:13] "GET /update_data/ETHUSDT_2024-10-14_16-52-05.pkl HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:52:13] "GET /metrics HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:54:43] "GET /update_data/BTCUSDT_2024-10-13_23-00-55.pkl HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:54:43] "GET /metrics HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:54:45] "GET /update_data/BTCUSDT_2024-10-14_16-51-06.pkl HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:54:45] "GET /metrics HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:54:48] "GET /update_data/ETHUSDT_2024-10-14_16-52-05.pkl HTTP/1.1" 200 -
127.0.0.1 - - [14/Oct/2024 16:54:48] "GET /metrics HTTP/1.1" 200 -
