In [15]:
import yfinance as yf
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Tải dữ liệu từ Yahoo Finance
data = yf.download("BTC-USD", start="2024-08-01", end="2024-10-31")
data['Return'] = data['Adj Close'].pct_change().fillna(0)  # Tính giá trị thay đổi phần trăm

# Định nghĩa lớp Môi trường
class StockEnv:
    def __init__(self, data):
        self.data = data
        self.index = 0
        self.done = False

    def reset(self):
        self.index = 0
        self.done = False
        # Trả về state chỉ bao gồm 'Return'
        return np.array([self.data['Return'].iloc[self.index]])

    def step(self, action):
        self.index += 1
        if self.index >= len(self.data):
            self.done = True
            return np.array([self.data['Return'].iloc[-1]]), 0, self.done, {}

        # Tính phần thưởng: ví dụ reward = Return * action (mua hoặc bán)
        reward = self.data['Return'].iloc[self.index] * action
        return np.array([self.data['Return'].iloc[self.index]]), reward, self.done, {}

# Định nghĩa lớp Actor-Critic
class ActorCritic(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ActorCritic, self).__init__()
        self.actor = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim),
            nn.Softmax(dim=-1)
        )
        self.critic = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, state):
        action_probs = self.actor(state)
        value = self.critic(state)
        return action_probs, value

# Hàm chọn hành động
def choose_action(state, model):
    action_probs, _ = model(state)
    action_probs = action_probs.squeeze()  # Bỏ chiều không cần thiết
    action = torch.multinomial(action_probs, 1).item()  # Chọn hành động dựa trên xác suất
    return action, action_probs[action]

# Hàm huấn luyện Actor-Critic
def train_actor_critic(env, model, optimizer, gamma=0.99, max_episodes=500):
    for episode in range(max_episodes):
        state = env.reset()
        state = torch.FloatTensor(state)
        log_probs = []
        values = []
        rewards = []
        done = False
        total_reward = 0

        while not done:
            state_tensor = state.unsqueeze(0)  # Đảm bảo kích thước (1, input_dim)
            action, log_prob = choose_action(state_tensor, model)
            next_state, reward, done, _ = env.step(action)
            next_state = torch.FloatTensor(next_state)

            _, value = model(state_tensor)
            log_probs.append(log_prob)
            values.append(value)
            rewards.append(torch.tensor([reward], dtype=torch.float))

            state = next_state
            total_reward += reward

        # Tính toán lợi thế và cập nhật mạng neural
        Qval, _ = model(torch.FloatTensor(state).unsqueeze(0))
        Qval = Qval.detach()
        values.append(Qval)

        policy_loss = []
        value_loss = []
        returns = Qval

        for i in reversed(range(len(rewards))):
            returns = rewards[i] + gamma * returns
            advantage = returns - values[i]
            value_loss.append(advantage ** 2)
            policy_loss.append(-log_probs[i] * advantage.detach())

        optimizer.zero_grad()
        loss = torch.stack(policy_loss).sum() + torch.stack(value_loss).sum()
        loss.backward()
        optimizer.step()

        print(f"Episode {episode}, Total reward: {total_reward}")

# Thiết lập mô hình và huấn luyện
env = StockEnv(data)
input_dim = 1  # Chỉ có "Return" làm đặc trưng
output_dim = 2  # Hành động: Mua hoặc Bán
model = ActorCritic(input_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Huấn luyện mô hình
train_actor_critic(env, model, optimizer, max_episodes=100)

[*********************100%***********************]  1 of 1 completed


Episode 0, Total reward: 0.2614649617100072
Episode 1, Total reward: 0.3284122264163689
Episode 2, Total reward: -0.1256319999264941
Episode 3, Total reward: 0.35099352456314914
Episode 4, Total reward: 0.21547219246200955
Episode 5, Total reward: 0.1892072311632581
Episode 6, Total reward: 0.025805980499173264
Episode 7, Total reward: -0.138587146077091
Episode 8, Total reward: 0.007563122681953693
Episode 9, Total reward: -0.0854295008447038
Episode 10, Total reward: 0.02443880795863873
Episode 11, Total reward: -0.1744398855136351
Episode 12, Total reward: 0.07591909435455058
Episode 13, Total reward: -0.007163471777001051
Episode 14, Total reward: -0.06713934105946406
Episode 15, Total reward: -0.11777299469670877
Episode 16, Total reward: 0.05846383191047633
Episode 17, Total reward: 0.10797042673686819
Episode 18, Total reward: 0.18466081126668787
Episode 19, Total reward: 0.13475110501292387
Episode 20, Total reward: 0.03570760949357121
Episode 21, Total reward: 0.19668418820062

In [16]:
def make_decision(model, current_state):
    model.eval()  # Đặt mô hình ở chế độ đánh giá
    state_tensor = torch.FloatTensor(current_state).unsqueeze(0)
    action_probs, _ = model(state_tensor)
    action_probs = action_probs.squeeze()
    action = torch.argmax(action_probs).item()  # Chọn hành động có xác suất cao nhất
    action_text = "Buy" if action == 1 else "Sell"
    print(f"Recommended Action: {action_text} (Probability: {action_probs[action]:.2f})")
    return action

In [17]:
current_state = np.array([data['Return'].iloc[-1]])
make_decision(model, current_state)

Recommended Action: Buy (Probability: 0.68)


1

In [18]:
data

Price,Adj Close,Close,High,Low,Open,Volume,Return
Ticker,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,Unnamed: 7_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2024-08-01 00:00:00+00:00,65357.500000,65357.500000,65593.242188,62248.941406,64625.839844,40975554494,0.000000
2024-08-02 00:00:00+00:00,61415.066406,61415.066406,65523.222656,61184.894531,65353.500000,43060875727,-0.060321
2024-08-03 00:00:00+00:00,60680.093750,60680.093750,62148.371094,59836.527344,61414.808594,31753030589,-0.011967
2024-08-04 00:00:00+00:00,58116.976562,58116.976562,61062.988281,57210.804688,60676.093750,31758917219,-0.042240
2024-08-05 00:00:00+00:00,53991.457031,53991.457031,58268.828125,49121.238281,58110.296875,108991085584,-0.070986
...,...,...,...,...,...,...,...
2024-10-26 00:00:00+00:00,67014.695312,67014.695312,67317.921875,66360.593750,66628.734375,19588098156,0.005586
2024-10-27 00:00:00+00:00,67929.296875,67929.296875,68221.312500,66847.226562,67023.476562,16721307878,0.013648
2024-10-28 00:00:00+00:00,69907.757812,69907.757812,70212.265625,67535.132812,67922.671875,38799856657,0.029125
2024-10-29 00:00:00+00:00,72720.492188,72720.492188,73577.210938,69729.914062,69910.046875,58541874402,0.040235
