# 使用自编码器提取特征


## 1. 读取筛选后的数据


In [1]:
import pandas as pd
import torch

# 选取需要的列
COLUMN_INDEX = [
    "Temp (°C)",
    "Dew Point Temp (°C)",
    "Rel Hum (%)",
    "Wind Spd (km/h)",
    "Stn Press (kPa)",
]

data = pd.read_csv("Data/DataSet.csv")[COLUMN_INDEX].to_numpy()

data_tensor = torch.tensor(data, dtype=torch.float32)
data_tensor

tensor([[  1.5000,  -3.6000,  69.0000,  39.0000,  99.8100],
        [  1.5000,  -3.9000,  67.0000,  35.0000, 100.0100],
        [  1.0000,  -4.3000,  68.0000,  32.0000, 100.1400],
        ...,
        [  4.0000,   3.6000,  97.0000,  32.0000,  99.1500],
        [  4.0000,   3.6000,  97.0000,  30.0000,  98.8000],
        [  4.0000,   3.7000,  98.0000,  15.0000,  98.5700]])

## 2. 模型训练


划分训练集和测试集


In [2]:
from Network import *

# 设置随机种子以确保结果可重复
torch.manual_seed(42)
train_loader, test_loader = Split2Loaders(
    data_tensor, data_tensor, BATCHSIZE=32, RATIO=0.7, SHUFFLE=True)

定义Auto Encoder结构


In [3]:
from torchinfo import summary

MyAE = MyAutoencoder(input_size=5, hidden_size_1=6,
                     hidden_size_2=3, dropout_prob=0.05)
INIT_WEIGHTS_XAVIER(MyAE)
summary(MyAE, (1, 5))

Layer (type:depth-idx)                   Output Shape              Param #
MyAutoencoder                            [1, 5]                    --
├─Sequential: 1-1                        [1, 3]                    --
│    └─Linear: 2-1                       [1, 6]                    36
│    └─ReLU: 2-2                         [1, 6]                    --
│    └─BatchNorm1d: 2-3                  [1, 6]                    12
│    └─Dropout: 2-4                      [1, 6]                    --
│    └─Linear: 2-5                       [1, 3]                    21
│    └─ReLU: 2-6                         [1, 3]                    --
│    └─BatchNorm1d: 2-7                  [1, 3]                    6
│    └─Dropout: 2-8                      [1, 3]                    --
├─Sequential: 1-2                        [1, 5]                    --
│    └─Linear: 2-9                       [1, 6]                    24
│    └─ReLU: 2-10                        [1, 6]                    --
│    └─BatchNorm

训练模型


In [4]:
optimizer = torch.optim.Adam(MyAE.parameters(), lr=0.01, weight_decay=1E-5)
TRAIN_WITH_PROGRESS_BAR(
    MyAE,
    NUM_EPOCHS=5,
    OPTIMIZER=optimizer,
    TRAIN_LOADER=train_loader,
    TEST_LOADER=test_loader,
    LOSS_TYPE=nn.MSELoss(),
    DEVICE=0,
)

PyTorch Version: 2.1.0.dev20230621+cu117
Training on cuda:0


Epoch 1/5: 100%|██████████| 384/384 [00:00<00:00, 492.14batch/s, loss=81.6]   
Epoch 2/5: 100%|██████████| 384/384 [00:00<00:00, 515.28batch/s, loss=91.5]
Epoch 3/5: 100%|██████████| 384/384 [00:00<00:00, 502.21batch/s, loss=138] 
Epoch 4/5: 100%|██████████| 384/384 [00:00<00:00, 510.79batch/s, loss=59.8]
Epoch 5/5: 100%|██████████| 384/384 [00:00<00:00, 527.34batch/s, loss=49.6]





([tensor(1567.7404),
  tensor(74.3750),
  tensor(68.6824),
  tensor(67.6160),
  tensor(67.7022)],
 [tensor(53.4542),
  tensor(21.8621),
  tensor(23.7604),
  tensor(23.8114),
  tensor(23.3905)])