In [1]:
import pandas as pd
import numpy as np
from DataPreprocessing import normalization, build_state_action
from DDPG_agent import DDPG
import matplotlib.pyplot as plt
import os

In [2]:
#####################  hyper parameters  ####################
N_FEATURES = 6
A_LOW = 0
A_HIGH = 1
LR_A = 0.001
LR_C = 0.003
N_ACTOR_HIDDEN = 30
N_CRITIC_HIDDEN = 30
MAX_EPISODES = 300

GAMMA = 0.9                # discount factor
TAU = 0.1                 # soft update coefficient
MEMORY_CAPACITY = 100000    # memory size
BATCH_SIZE = 128            # batch size for gradient descent
#############################################################

In [3]:
# Load data
data_dir = os.path.join(os.getcwd(), 'V6.csv')
data = pd.read_csv(data_dir, encoding='gbk')
data = data.iloc[:, 0]


In [4]:
##take minified sample
data=data[:1238]

In [5]:
#Build state matrix and best action
state,action = build_state_action(data,N_FEATURES,1)

In [6]:
len(state),len(action)

(1232, 1232)

In [7]:
#Data split
SPLIT_RATE = 0.75
split_index = round(len(state) * SPLIT_RATE)
train_s, train_a = state[:split_index], action[:split_index]
test_s, test_a = state[split_index:], action[split_index:]

In [8]:
#Normalization
train_s_scaled,test_s_scaled = normalization(train_s,test_s)
A,B = train_a.max(),train_a.min()
train_a_scaled,test_a_scaled = (train_a-B)/(A-B),(test_a-B)/(A-B)


In [None]:
# Training
ddpg = DDPG(N_FEATURES, A_LOW, A_HIGH, LR_A, LR_C, N_ACTOR_HIDDEN, N_CRITIC_HIDDEN, GAMMA, TAU, MEMORY_CAPACITY, BATCH_SIZE)
for episode in range(MAX_EPISODES):
    ep_reward = 0
    for index in range(len(train_s_scaled) - 1):  # Iterate over the entire dataset sequentially
        s = train_s_scaled[index]
        a = ddpg.choose_action(s)
        r = -abs(a - train_a_scaled[index])  # Compute reward
        ep_reward += r
        s_ = train_s_scaled[index + 1]  # Next state is the immediate next in the dataset
        
        ddpg.store_transition(s, a, r, s_)
        ddpg.learn()
    
    print(f'Episode {episode}: {ep_reward}')




Episode 0: [nan]
Episode 1: [nan]
Episode 2: [nan]
Episode 3: [nan]
Episode 4: [nan]
Episode 5: [nan]
Episode 6: [nan]
Episode 7: [nan]


In [None]:
# Testing
pred = [ddpg.choose_action(state) for state in test_s_scaled]
pred = np.array(pred).flatten() * (A - B) + B  # Rescale predictions
actual = test_a

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Convert pred and actual to numpy arrays if they aren't already
pred = np.array(pred)
actual = np.array(actual)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual, pred))
print(f"RMSE: {rmse}")

# Calculate MAE
mae = mean_absolute_error(actual, pred)
print(f"MAE: {mae}")


In [None]:
plt.scatter(pred, actual, marker='.')
plt.xlabel('Predicted Value')
plt.ylabel('Actual Value')
plt.show()