In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter
from scipy.stats import norm

In [3]:
if not os.path.exists('image_4'):
    os.makedirs('image_4')

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
device

device(type='cuda')

In [6]:
# # 检查 PyTorch 版本
# print("PyTorch version:", torch.__version__)
# 
# # 检查 CUDA 可用性
# print("CUDA available:", torch.cuda.is_available())
# 
# # 创建一个简单的张量并进行基本运算
# a = torch.tensor([1.0, 2.0, 3.0])
# b = torch.tensor([4.0, 5.0, 6.0])
# print("a + b =", a + b)
# 
# # 测试其他函数是否正常工作
# import numpy as np
# 
# def test_function():
#     print("This is a test function.")
# 
# test_function()
# 
# print("NumPy version:", np.__version__)
# print("NumPy array:", np.array([1, 2, 3]))

# EXP4

In [7]:
# test_data = np.random.rand(10, 1000)
# 
# # Plotting means_load
# plt.figure(figsize=(12, 6))
# plt.plot(test_data, marker='o', linestyle='-', color='b', label='means_load')
# plt.title('Random Means for Load')
# plt.xlabel('Node')
# plt.ylabel('Mean Load')
# plt.legend()
# plt.grid(True)
# 
# # Adjust layout
# plt.tight_layout()
# plt.show()

In [8]:
# Function to generate random means
def generate_means(N, mean_load, var_load):
    print('Mean Load:', mean_load, '; Variance Load:', var_load)
    means_load = np.random.normal(loc=mean_load, scale=np.sqrt(var_load), size=N)
    
    # # Plotting means_load
    # plt.figure(figsize=(12, 6))
    # plt.plot(means_load, marker='o', linestyle='-', color='b', label='means_load')
    # plt.title('Random Means for Load')
    # plt.xlabel('Node')
    # plt.ylabel('Mean Load')
    # plt.legend()
    # plt.grid(True)
    # 
    # # Adjust layout
    # plt.tight_layout()
    # plt.show()

    return means_load

In [9]:
# Function to generate IID data
def generate_iid_data(N, T, means_load):
    loads = np.array([np.random.normal(loc=means_load[i], scale=1, size=T) for i in range(N)])

    return loads, np.mean(loads, axis=1)

In [10]:
# Function to generate AR(1) data
def generate_ar1_data(N, T, means_load, theta):
    """
    Generate AR(1) data for multiple nodes.
    
    Parameters:
        N (int): Number of nodes.
        T (int): Number of time steps.
        means_load (array-like): Mean load for each node.
        theta (float): AR(1) process parameter.
    
    Returns:
        load (ndarray): Generated AR(1) load data.
    """
    # print('Theta:', theta)
    loads = np.zeros((N, T))

    def generate_ar1(theta, T, mean_node):
        """
        Generate a single AR(1) time series.
        
        Parameters:
            theta (float): AR(1) process parameter.
            n (int): Number of time steps.
            mean_node (float): Mean load for the node.
        
        Returns:
            ar1 (ndarray): Generated AR(1) time series.
        """
        ar1 = np.zeros(T)
        ar1[0] = mean_node
        for t in range(1, T):
            ar1[t] = theta * ar1[t - 1] + (1 - theta) * mean_node + np.random.normal(0, 1)
        return ar1

    for i in range(N):
        loads[i] = generate_ar1(theta, T, means_load[i])

    return loads, np.mean(loads, axis=1)

In [11]:
def calculate_reward_0(load):
    # Simple reward function
    return 1 / (1 + load_iid)

In [12]:
# 线性反转缩放 Linear inverse scaling
def calculate_reward(load, alpha=1):
    # Linear inverse scaling reward function.
    return (np.max(load) - load) / (np.max(load) - np.min(load)) * alpha

In [13]:
N = 10
T = 1000
mean_load = 50
var_load = 10
k_values = [1, 2, 5]
thetas = [0.1, 0.5, 0.9]

In [14]:
%matplotlib inline

In [15]:
means_load = generate_means(N, mean_load, var_load)

Mean Load: 50 ; Variance Load: 10


In [16]:
# Generate IID data
load_iid, load_iid_means = generate_iid_data(N, T, means_load)
load_reward_iid = calculate_reward(load_iid, 10)
# load_reward_iid_means = calculate_reward(load_iid_means, 10)

In [17]:
# # Generate AR(1) data
# # load_ar1, load_ar1_means = generate_ar1_data(N, T, means_load, theta)
# load_ar1_thetas = []
# load_ar1_means_thetas = []
# load_reward_ar1_thetas = []
# load_reward_ar1_means_thetas = []
# for theta in thetas:
#     load_ar1_theta, load_ar1_means_theta = generate_ar1_data(N, T, means_load, theta)
# 
#     load_reward_ar1_theta = calculate_reward(load_ar1_theta, 10)
#     # load_reward_ar1_means_theta = calculate_reward(load_ar1_means_theta, 10)
# 
#     load_ar1_thetas.append(load_ar1_theta)
#     load_ar1_means_thetas.append(load_ar1_means_theta)
#     load_reward_ar1_thetas.append(load_reward_ar1_theta)
#     # load_reward_ar1_means_thetas.append(load_reward_ar1_means_theta)

In [18]:
class LSTMExpert(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMExpert, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), hidden_size).to(device)
        c0 = torch.zeros(1, x.size(0), hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [19]:
# def prepare_data(rewards, time_steps):
#     X, y = [], []
#     for t in range(len(rewards) - time_steps):
#         X.append(rewards[t:t + time_steps])
#         y.append(rewards[t + time_steps])
#     return np.array(X), np.array(y)

In [20]:
def prepare_data(rewards, time_steps):
    X, y = [], []
    for t in range(rewards.shape[1] - time_steps):
        X.append(rewards[:, t:t + time_steps].T)
        y.append(rewards[:, t + time_steps])
    return np.array(X), np.array(y)

In [21]:
# def train_lstm_models(rewards, num_experts, time_steps, input_size, hidden_size, output_size, epochs=10):
#     models = [LSTMExpert(input_size, hidden_size, output_size).to(device) for _ in range(num_experts)]
#     optimizers = [optim.Adam(model.parameters(), lr=0.01) for model in models]
#     criterion = nn.MSELoss()
# 
#     X, y = prepare_data(rewards, time_steps)
#     X = torch.tensor(X, dtype=torch.float32).to(device)
#     y = torch.tensor(y, dtype=torch.float32).to(device)
# 
#     for model, optimizer in zip(models, optimizers):
#         for epoch in range(epochs):
#             model.train()
#             optimizer.zero_grad()
#             output = model(X)
#             loss = criterion(output, y)
#             loss.backward()
#             optimizer.step()
# 
#     return models

In [22]:
def train_lstm_models(rewards, num_experts, time_steps, input_size, hidden_size, output_size, epochs=10):
    models = [LSTMExpert(input_size, hidden_size, output_size).to(device) for _ in range(num_experts)]
    optimizers = [optim.Adam(model.parameters(), lr=0.01) for model in models]
    criterion = nn.MSELoss()

    X, y = prepare_data(rewards, time_steps)
    X = torch.tensor(X, dtype=torch.float32).to(device)
    y = torch.tensor(y, dtype=torch.float32).to(device)

    for model, optimizer in zip(models, optimizers):
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            output = model(X)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()

    return models


In [23]:
# def exp4_with_lstm(rewards, lstm_models, gamma, alpha, time_steps):
#     N, T = rewards.shape
#     K = len(lstm_models)
# 
#     # 初始化权重
#     weights = np.ones(K)
# 
#     all_regrets = []
#     all_actions = []
#     all_expert_weights = []
# 
#     optimal_reward = np.max(np.mean(rewards, axis=1))
# 
#     # 准备时间序列数据
#     history = np.zeros((time_steps, N))
# 
#     for t in range(T):
#         # 更新历史数据
#         if t >= time_steps:
#             history = np.roll(history, -1, axis=0)
#             history[-1, :] = rewards[:, t]
# 
#         if t < time_steps:
#             expert_advice = np.random.rand(K, N)
#         else:
#             # 使用LSTM模型预测奖励
#             history_tensor = torch.tensor(history[np.newaxis, :, :], dtype=torch.float32).to(device)
#             expert_advice = np.array([model(history_tensor).cpu().detach().numpy()[0] for model in lstm_models])
# 
#         # 计算专家的权重分布
#         expert_weights = weights / np.sum(weights)
# 
#         # 计算每个臂的选择概率
#         probabilities = np.dot(expert_weights, expert_advice)
#         probabilities = (1 - gamma) * probabilities + gamma / N
# 
#         # 选择一个臂
#         action = np.random.choice(N, p=probabilities)
#         reward = rewards[action, t]
# 
#         # 更新权重
#         estimated_reward = reward / probabilities[action]
#         for k in range(K):
#             weights[k] *= np.exp(alpha * expert_advice[k, action] * estimated_reward / N)
# 
#         # 记录
#         all_actions.append(action)
#         all_regrets.append(optimal_reward - reward)
#         all_expert_weights.append(expert_weights.copy())
# 
#     return np.array(all_actions), np.array(all_regrets), np.array(all_expert_weights)

In [24]:
def exp4_with_lstm(rewards, lstm_models, gamma, alpha, time_steps):
    N, T = rewards.shape
    K = len(lstm_models)

    weights = np.ones(K)

    all_regrets = []
    all_actions = []
    all_expert_weights = []

    optimal_reward = np.max(np.mean(rewards, axis=1))

    history = np.zeros((time_steps, N))

    for t in range(T):
        if t >= time_steps:
            history = np.roll(history, -1, axis=0)
            history[-1, :] = rewards[:, t]

        if t < time_steps:
            expert_advice = np.random.rand(K, N)
        else:
            history_tensor = torch.tensor(history[np.newaxis, :, :], dtype=torch.float32).to(device)
            expert_advice = np.array([model(history_tensor).cpu().detach().numpy()[0] for model in lstm_models])

        expert_weights = weights / np.sum(weights)

        probabilities = np.dot(expert_weights, expert_advice)
        probabilities = (1 - gamma) * probabilities + gamma / N

        # 归一化概率以确保其和为1
        probabilities /= np.sum(probabilities)

        action = np.random.choice(N, p=probabilities)
        reward = rewards[action, t]

        estimated_reward = reward / probabilities[action]
        for k in range(K):
            weights[k] *= np.exp(alpha * expert_advice[k, action] * estimated_reward / N)

        all_actions.append(action)
        all_regrets.append(optimal_reward - reward)
        all_expert_weights.append(expert_weights.copy())

    return np.array(all_actions), np.array(all_regrets), np.array(all_expert_weights)


In [25]:
# 示例数据
N = 10  # 臂的数量
T = 1000  # 时间步数
K = 5  # 专家的数量
time_steps = 10  # LSTM 模型的时间步

rewards = load_reward_iid

# 训练LSTM模型
input_size = N
hidden_size = 50
output_size = N
lstm_models = train_lstm_models(rewards, K, time_steps, input_size, hidden_size, output_size)

# 设置参数
gamma = 0.1
alpha = 0.1

# 运行EXP4算法
actions, regrets, expert_weights = exp4_with_lstm(rewards, lstm_models, gamma, alpha, time_steps)

# 绘制结果
plt.figure(figsize=(12, 6))
plt.plot(np.cumsum(regrets))
plt.title('Cumulative Regrets of EXP4 with LSTM')
plt.xlabel('Time Steps')
plt.ylabel('Cumulative Regret')
plt.grid(True)
plt.show()


  weights[k] *= np.exp(alpha * expert_advice[k, action] * estimated_reward / N)
  expert_weights = weights / np.sum(weights)


ValueError: probabilities contain NaN

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

class LSTMExpert(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMExpert, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), hidden_size).to(device)
        c0 = torch.zeros(1, x.size(0), hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

def prepare_data(rewards, time_steps):
    X, y = [], []
    for t in range(rewards.shape[1] - time_steps):
        X.append(rewards[:, t:t + time_steps].T)
        y.append(rewards[:, t + time_steps])
    return np.array(X), np.array(y)


def train_lstm_models(rewards, num_experts, time_steps, input_size, hidden_size, output_size, epochs=10):
    models = [LSTMExpert(input_size, hidden_size, output_size).to(device) for _ in range(num_experts)]
    optimizers = [optim.Adam(model.parameters(), lr=0.01) for model in models]
    criterion = nn.MSELoss()

    X, y = prepare_data(rewards, time_steps)
    X = torch.tensor(X, dtype=torch.float32).to(device)
    y = torch.tensor(y, dtype=torch.float32).to(device)

    for model, optimizer in zip(models, optimizers):
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            output = model(X)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()

    return models

In [None]:
def train_lstm_models(rewards, num_experts, time_steps, input_size, hidden_size, output_size, epochs=10):
    models = [LSTMExpert(input_size, hidden_size, output_size).to(device) for _ in range(num_experts)]
    optimizers = [optim.Adam(model.parameters(), lr=0.01) for model in models]
    criterion = nn.MSELoss()

    X, y = prepare_data(rewards, time_steps)
    X = torch.tensor(X, dtype=torch.float32).to(device)
    y = torch.tensor(y, dtype=torch.float32).to(device)

    for model_idx, (model, optimizer) in enumerate(zip(models, optimizers)):
        print(f"Training model {model_idx + 1}/{num_experts}")
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            output = model(X)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            print(f"Model {model_idx + 1}, Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}")

    return models


In [None]:
def calculate_top_k_accuracy(actions, rewards_means, k):
    optimal_actions = np.argsort(rewards_means)[-k:]
    return np.mean(np.isin(actions, optimal_actions))

In [None]:

def exp4_with_lstm(rewards, lstm_models, gamma, alpha, time_steps):
    N, T = rewards.shape
    K = len(lstm_models)

    log_weights = np.zeros(K)  # 在对数空间初始化权重

    all_regrets = []
    all_actions = []
    all_expert_weights = []

    optimal_reward = np.max(np.mean(rewards, axis=1))

    history = np.zeros((time_steps, N))

    for t in range(T):
        if t >= time_steps:
            history = np.roll(history, -1, axis=0)
            history[-1, :] = rewards[:, t]

        if t < time_steps:
            expert_advice = np.random.rand(K, N)
        else:
            history_tensor = torch.tensor(history[np.newaxis, :, :], dtype=torch.float32).to(device)
            expert_advice = np.array([model(history_tensor).cpu().detach().numpy()[0] for model in lstm_models])

        expert_weights = np.exp(log_weights - np.max(log_weights))  # 防止数值溢出
        expert_weights /= np.sum(expert_weights)
        expert_weights = np.nan_to_num(expert_weights)

        probabilities = np.dot(expert_weights, expert_advice)
        probabilities = (1 - gamma) * probabilities + gamma / N
        probabilities = np.nan_to_num(probabilities)

        probabilities /= np.sum(probabilities)

        action = np.random.choice(N, p=probabilities)
        reward = rewards[action, t]

        estimated_reward = reward / probabilities[action]
        for k in range(K):
            log_weights[k] += alpha * expert_advice[k, action] * estimated_reward / N  # 更新在对数空间

        all_actions.append(action)
        all_regrets.append(optimal_reward - reward)
        all_expert_weights.append(expert_weights.copy())

    accuracies = {k: calculate_top_k_accuracy(actions, rewards_means, k) for k in k_values}
    
    return np.array(all_actions), np.array(all_regrets), np.array(all_expert_weights)

In [None]:

# 示例数据
N = 10  # 臂的数量
T = 1000  # 时间步数
K = 5  # 专家的数量
time_steps = 10  # LSTM 模型的时间步

rewards = load_reward_iid

# 训练LSTM模型
input_size = N
hidden_size = 50
output_size = N
lstm_models = train_lstm_models(rewards, K, time_steps, input_size, hidden_size, output_size)

# 设置参数
gamma = 0.1
alpha = 0.1

# 运行EXP4算法
actions, regrets, expert_weights = exp4_with_lstm(rewards, lstm_models, gamma, alpha, time_steps)





In [ ]:
actions

In [None]:
regrets

In [ ]:
expert_weights

In [ ]:
# 绘制结果
plt.figure(figsize=(12, 6))
plt.plot(np.cumsum(regrets))
plt.title('Cumulative Regrets of EXP4 with LSTM')
plt.xlabel('Time Steps')
plt.ylabel('Cumulative Regret')
plt.grid(True)
plt.show()