# 使用自编码器提取特征


## 1. 读取筛选后的数据


In [1]:
import pandas as pd
import torch

# 选取需要的列
COLUMN_INDEX = [
    "Temp (°C)",
    "Dew Point Temp (°C)",
    "Rel Hum (%)",
    "Wind Spd (km/h)",
    "Stn Press (kPa)",
]

data = pd.read_csv("Data/DataSet.csv")[COLUMN_INDEX].to_numpy()

data_tensor = torch.tensor(data, dtype=torch.float32)
data_tensor

tensor([[  1.5000,  -3.6000,  69.0000,  39.0000,  99.8100],
        [  1.5000,  -3.9000,  67.0000,  35.0000, 100.0100],
        [  1.0000,  -4.3000,  68.0000,  32.0000, 100.1400],
        ...,
        [  4.0000,   3.6000,  97.0000,  32.0000,  99.1500],
        [  4.0000,   3.6000,  97.0000,  30.0000,  98.8000],
        [  4.0000,   3.7000,  98.0000,  15.0000,  98.5700]])

## 2. 模型训练


划分测试集和训练集


In [2]:
from torch.utils.data import TensorDataset,DataLoader, random_split

train_size = int(0.8 * data_tensor.shape[0])
test_size = data_tensor.shape[0] - train_size
train_dataset, test_dataset = random_split(
    TensorDataset(data_tensor,data_tensor), [train_size, test_size]
)

BATCHSIZE=32

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


定义Auto Encoder结构


In [7]:
from Network import *
from torchinfo import summary

MyAE = MyAutoencoder(input_size=5, hidden_size_1=8, hidden_size_2=4, dropout_prob=0.2)
summary(MyAE, (1, 5))

Layer (type:depth-idx)                   Output Shape              Param #
MyAutoencoder                            [1, 5]                    --
├─Sequential: 1-1                        [1, 4]                    --
│    └─Linear: 2-1                       [1, 8]                    48
│    └─ReLU: 2-2                         [1, 8]                    --
│    └─Dropout: 2-3                      [1, 8]                    --
│    └─Linear: 2-4                       [1, 4]                    36
│    └─ReLU: 2-5                         [1, 4]                    --
│    └─Dropout: 2-6                      [1, 4]                    --
├─Sequential: 1-2                        [1, 5]                    --
│    └─Linear: 2-7                       [1, 8]                    40
│    └─ReLU: 2-8                         [1, 8]                    --
│    └─Dropout: 2-9                      [1, 8]                    --
│    └─Linear: 2-10                      [1, 5]                    45
Total params: 1

定义训练函数


In [8]:
from tqdm import tqdm
from torch import nn
from Network import *

def trainer(MODEL,NUM_EPOCHS,OPTIMIZER,TRAIN_LOADER,TEST_LOADER=None,LOSS_TYPE=nn.MSELoss(),DEVICE=0):
    print("PyTorch Version:",torch.__version__)
    device=GET_DEVICE(DEVICE)
    print("Training on",device)
    print(
        "====================================Start training===================================="
    )
    MODEL.to(device)
    for epoch in range(NUM_EPOCHS):
        with tqdm(
            train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}", unit="batch"
        ) as t:
            for x, y in t:
                # 前向传播
                x, y = x.to(device), y.to(device)
                output = MODEL(x)
                loss = LOSS_TYPE(output, y)

                # 反向传播
                OPTIMIZER.zero_grad()
                loss.backward()
                OPTIMIZER.step()
                t.set_postfix(loss=loss.item())
    print(
        "====================================Finish training====================================\n"
    )


训练模型


In [9]:
# 训练模型
optimizer = torch.optim.Adam(MyAE.parameters(), lr=0.001, weight_decay=0.001)
trainer(
    MyAE,
    NUM_EPOCHS=15,
    OPTIMIZER=optimizer,
    TRAIN_LOADER=train_loader,
    LOSS_TYPE=nn.MSELoss(),
    DEVICE=0,
)

PyTorch Version: 1.11.0
Training on cuda:0


Epoch 1/15: 100%|██████████| 438/438 [00:02<00:00, 167.78batch/s, loss=nan]   
Epoch 2/15: 100%|██████████| 438/438 [00:02<00:00, 201.80batch/s, loss=nan]
Epoch 3/15: 100%|██████████| 438/438 [00:02<00:00, 204.64batch/s, loss=nan]
Epoch 4/15: 100%|██████████| 438/438 [00:01<00:00, 221.08batch/s, loss=nan]
Epoch 5/15: 100%|██████████| 438/438 [00:02<00:00, 215.39batch/s, loss=nan]
Epoch 6/15: 100%|██████████| 438/438 [00:02<00:00, 212.08batch/s, loss=nan]
Epoch 7/15: 100%|██████████| 438/438 [00:02<00:00, 218.54batch/s, loss=nan]
Epoch 8/15: 100%|██████████| 438/438 [00:02<00:00, 213.15batch/s, loss=nan]
Epoch 9/15: 100%|██████████| 438/438 [00:02<00:00, 209.69batch/s, loss=nan]
Epoch 10/15: 100%|██████████| 438/438 [00:02<00:00, 203.79batch/s, loss=nan]
Epoch 11/15: 100%|██████████| 438/438 [00:02<00:00, 210.94batch/s, loss=nan]
Epoch 12/15: 100%|██████████| 438/438 [00:02<00:00, 207.80batch/s, loss=nan]
Epoch 13/15: 100%|██████████| 438/438 [00:02<00:00, 209.44batch/s, loss=nan]
Epoch





