In [1]:
import numpy as np
import sklearn.preprocessing
import torch
from sklearn.model_selection import train_test_split
import torch
from matplotlib import pyplot as plt 
import random
import torch.nn as nn
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import Dataset, DataLoader, TensorDataset, Subset, SubsetRandomSampler
import os
import json
import torchvision 
import torchvision.transforms as transforms
from torch.utils.data import random_split
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
from sklearn.model_selection import KFold
import pandas as pd
from time import time
# from numpy.lib.stride_tricks import as_strided
from torchvision.datasets import ImageFolder
from torch import as_strided


device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

In [22]:
PEMS04 = np.load('./高速公路车流量数据集/PEMS04.npz')
dataset_PEMS04 = PEMS04['data']
print(dataset_PEMS04.shape) # 车流量，拥挤程度，车速

(16992, 307, 3)


# 数据处理

In [23]:
%config ServerApp.iopub_data_rate_limit = 10000000

In [24]:
scaler = np.zeros(dataset_PEMS04.shape)
print(scaler.shape)
v_scaler = MinMaxScaler()  # 速度归一化
o_scaler = MinMaxScaler()  # 拥挤程度归一化
f_scaler = MinMaxScaler()  # 车流量归一化
scaler[:,:,0] = f_scaler.fit_transform(dataset_PEMS04[:,:,0])
scaler[:,:,1] = o_scaler.fit_transform(dataset_PEMS04[:,:,1])
scaler[:,:,2] = v_scaler.fit_transform(dataset_PEMS04[:,:,2])
# 划分训练数据集，验证数据集，测试数据集
train_proportion, val_proportion, test_proportion = 0.6, 0.2, 0.2
train_dataset = scaler[:int(train_proportion * len(dataset_PEMS04)),:,:]
val_dataset = scaler[int(train_proportion * len(dataset_PEMS04)):int((train_proportion + val_proportion) * len(dataset_PEMS04)),:,:]
test_dataset = scaler[int((train_proportion + val_proportion) * len(dataset_PEMS04)):,:,:]

(16992, 307, 3)


# 采样

In [29]:
class TrafficDataset(Dataset):
    def __init__(self, dataset, windows_size, predict_len=1):
        self.dataset = dataset
        self.windows_size = windows_size
        self.predict_len = predict_len
        self.T, self.N, self.F = self.dataset.shape  # 时间步，节点数，特征数
        
    def __len__(self):
        return self.T - self.windows_size - self.predict_len + 1
    
    def __getitem__(self, idx):
        # 输入序列: (window_size, N, F)
        x = self.dataset[idx:idx + self.windows_size, :, :]
        
        # 目标序列: (pred_len, N, F) - 预测未来pred_len个时间步
        y = self.dataset[idx + self.windows_size:idx + self.windows_size + self.predict_len, :, :]
        
        # 转换为PyTorch张量并调整维度为 (序列长度, 节点数×特征数)
        # 或者保持原始维度，在模型中处理
        x = torch.FloatTensor(x)  # shape: (window_size, N, F)
        y = torch.FloatTensor(y)  # shape: (pred_len, N, F)
        
        return x, y

# 采样

In [32]:
windows_size = 12  # 输入60分钟数据（5分钟×12）
predict_len = 1     # 预测未来15分钟

train_ds = TrafficDataset(train_dataset, windows_size, predict_len)
val_ds = TrafficDataset(val_dataset, windows_size, predict_len)
test_ds = TrafficDataset(test_dataset, windows_size, predict_len)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=False)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

In [33]:
from sklearn.metrics import accuracy_score

@torch.no_grad()
def evaluating(model, dataloader, loss_fct):
    loss_list = []
    pred_list = []
    label_list = []
    for datas, labels in dataloader:
        datas = datas.to(device)
        labels = labels.to(device)
        # 前向计算
        logits = model(datas)
        loss = loss_fct(logits, labels)         # 验证集损失
        loss_list.append(loss.item())
        
        preds = logits.argmax(axis=-1)    # 验证集预测
        pred_list.extend(preds.cpu().numpy().tolist())
        label_list.extend(labels.cpu().numpy().tolist())
        
    acc = accuracy_score(label_list, pred_list)
    return np.mean(loss_list), acc


In [34]:
class EarlyStopCallback:
    def __init__(self, patience=5, min_delta=0.01):
        self.patience = patience
        self.min_delta = min_delta
        self.best_metric = -1
        self.counter = 0
        
    def __call__(self, metric):
        if metric >= self.best_metric + self.min_delta:
            # update best metric
            self.best_metric = metric
            # reset counter 
            self.counter = 0
        else: 
            self.counter += 1
            
    @property
    def early_stop(self):
        return self.counter >= self.patience


In [35]:
class MyRnn(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MyRnn, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.w_h = nn.Parameter(torch.randn(input_size, self.hidden_size))
        self.u_h = nn.Parameter(torch.randn(self.hidden_size, self.hidden_size))
        self.b_h = nn.Parameter(torch.zeros(self.hidden_size))
        
        self.w_y = nn.Parameter(torch.randn(self.hidden_size, self.output_size))
        self.b_y = nn.Parameter(torch.zeros(self.output_size))
        
        self.tanh = nn.Tanh()
        self.relu = nn.ReLU()
        
        self.init_weights()
    
    
    def init_weights(self):
        for param in self.parameters():
            if param.dim() > 1:
                nn.init.xavier_uniform_(param)
        
        
    def forward(self, x):
        batch_size = x.size(0)
        seq_len = x.size(1)
        h = torch.zeros(batch_size, self.hidden_size).to(device)
        y_list = []
        for i in range(seq_len):
            h = self.tanh(torch.matmul(x[:,i,:], self.w_h) + torch.matmul(h, self.u_h) + self.b_h)
            y = self.relu(torch.matmul(h, self.w_y) + self.b_y)
            y_list.append(y)
        
        return h, y_list