# 使用自编码器提取特征


## 1. 读取筛选后的数据


In [2]:
import pandas as pd
import torch

# 选取需要的列
COLUMN_INDEX = [
    "Temp (°C)",
    "Dew Point Temp (°C)",
    "Rel Hum (%)",
    "Wind Spd (km/h)",
    "Stn Press (kPa)",
]

data = pd.read_csv("Data/DataSet.csv")[COLUMN_INDEX].to_numpy()

data_tensor = torch.tensor(data, dtype=torch.float32)
data_tensor

tensor([[  1.5000,  -3.6000,  69.0000,  39.0000,  99.8100],
        [  1.5000,  -3.9000,  67.0000,  35.0000, 100.0100],
        [  1.0000,  -4.3000,  68.0000,  32.0000, 100.1400],
        ...,
        [  4.0000,   3.6000,  97.0000,  32.0000,  99.1500],
        [  4.0000,   3.6000,  97.0000,  30.0000,  98.8000],
        [  4.0000,   3.7000,  98.0000,  15.0000,  98.5700]])

## 2. 模型训练


划分测试集和训练集


In [3]:
from torch.utils.data import TensorDataset,DataLoader, random_split

train_size = int(0.8 * data_tensor.shape[0])
test_size = data_tensor.shape[0] - train_size
train_dataset, test_dataset = random_split(
    TensorDataset(data_tensor,data_tensor), [train_size, test_size]
)

BATCHSIZE=32

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


定义Auto Encoder结构


In [4]:
from Network import *
from torchinfo import summary

MyAE = MyAutoencoder(input_size=5, hidden_size_1=4, hidden_size_2=2, dropout_prob=0.05)
MyAE.apply(init_weights)
summary(MyAE, (1, 5))


Layer (type:depth-idx)                   Output Shape              Param #
MyAutoencoder                            [1, 5]                    --
├─Sequential: 1-1                        [1, 2]                    --
│    └─Linear: 2-1                       [1, 4]                    24
│    └─ReLU: 2-2                         [1, 4]                    --
│    └─BatchNorm1d: 2-3                  [1, 4]                    8
│    └─Dropout: 2-4                      [1, 4]                    --
│    └─Linear: 2-5                       [1, 2]                    10
│    └─ReLU: 2-6                         [1, 2]                    --
│    └─BatchNorm1d: 2-7                  [1, 2]                    4
│    └─Dropout: 2-8                      [1, 2]                    --
├─Sequential: 1-2                        [1, 5]                    --
│    └─Linear: 2-9                       [1, 4]                    12
│    └─ReLU: 2-10                        [1, 4]                    --
│    └─BatchNorm1

定义训练函数


In [5]:
from tqdm import tqdm
from torch import nn
from torch.nn.utils import clip_grad_norm_
from Network import *

def trainer(MODEL,NUM_EPOCHS,OPTIMIZER,TRAIN_LOADER,TEST_LOADER=None,LOSS_TYPE=nn.MSELoss(),DEVICE=0,GRAD_MAX=5):
    print("PyTorch Version:",torch.__version__)
    device=GET_DEVICE(DEVICE)
    print("Training on",device)
    print(
        "====================================Start training===================================="
    )
    MODEL.to(device)
    for epoch in range(NUM_EPOCHS):
        with tqdm(
            train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}", unit="batch"
        ) as t:
            for x, y in t:
                # 前向传播
                x, y = x.to(device), y.to(device)
                output = MODEL(x)
                loss = LOSS_TYPE(output, y)

                # 反向传播
                OPTIMIZER.zero_grad()
                loss.backward()

                # 梯度裁剪
                clip_grad_norm_(MODEL.parameters(),GRAD_MAX)

                OPTIMIZER.step()
                t.set_postfix(loss=loss.item())
    print(
        "====================================Finish training====================================\n"
    )


In [5]:
def trainer_temp(MODEL,NUM_EPOCHS,OPTIMIZER,TRAIN_LOADER,TEST_LOADER=None,LOSS_TYPE=nn.MSELoss(),DEVICE=0,GRAD_MAX=5):
    print("PyTorch Version:",torch.__version__)
    device=GET_DEVICE(DEVICE)
    print("Training on",device)
    print(
        "====================================Start training===================================="
    )
    # 模型传递到指定设备上
    MODEL.to(device)

    # 记录训练误差和测试误差
    train_losses = []
    test_losses = []

    for epoch in range(NUM_EPOCHS):
        # 切换到train模式
        MODEL.train()
        LOSS_TRAIN=torch.tensor(0.0)
        LOSS_TEST=torch.tensor(0,0)

        # 根据训练集上的loss作梯度下降
        with tqdm(
            train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}", unit="batch"
        ) as t:
            for x, y in t:
                # 前向传播
                x, y = x.to(device), y.to(device)
                output = MODEL(x)
                loss = LOSS_TYPE(output, y)

                # 反向传播
                OPTIMIZER.zero_grad()
                loss.backward()

                # 梯度裁剪
                clip_grad_norm_(MODEL.parameters(),GRAD_MAX)

                OPTIMIZER.step()
                t.set_postfix(loss=loss.item())
                LOSS_TRAIN+=loss.item()
        
        LOSS_TRAIN_AVERAGE=LOSS_TRAIN/len(train_loader)
    print(
        "====================================Finish training====================================\n"
    )

训练模型


In [6]:
# 训练模型
optimizer = torch.optim.Adam(MyAE.parameters(), lr=0.01, weight_decay=1E-5)
trainer(
    MyAE,
    NUM_EPOCHS=20,
    OPTIMIZER=optimizer,
    TRAIN_LOADER=train_loader,
    LOSS_TYPE=nn.MSELoss(),
    DEVICE=0,
)

PyTorch Version: 1.11.0
Training on cuda:0


Epoch 1/20: 100%|██████████| 438/438 [00:02<00:00, 167.97batch/s, loss=115]    
Epoch 2/20: 100%|██████████| 438/438 [00:02<00:00, 167.46batch/s, loss=83.4]
Epoch 3/20: 100%|██████████| 438/438 [00:03<00:00, 143.67batch/s, loss=52.2]
Epoch 4/20: 100%|██████████| 438/438 [00:02<00:00, 149.49batch/s, loss=99.7]
Epoch 5/20: 100%|██████████| 438/438 [00:02<00:00, 151.19batch/s, loss=98]  
Epoch 6/20: 100%|██████████| 438/438 [00:02<00:00, 156.52batch/s, loss=93.6]
Epoch 7/20: 100%|██████████| 438/438 [00:02<00:00, 156.86batch/s, loss=119] 
Epoch 8/20: 100%|██████████| 438/438 [00:02<00:00, 157.61batch/s, loss=59.8]
Epoch 9/20: 100%|██████████| 438/438 [00:02<00:00, 153.38batch/s, loss=47.4]
Epoch 10/20: 100%|██████████| 438/438 [00:02<00:00, 155.06batch/s, loss=78.8]
Epoch 11/20: 100%|██████████| 438/438 [00:02<00:00, 154.28batch/s, loss=84]  
Epoch 12/20: 100%|██████████| 438/438 [00:02<00:00, 154.89batch/s, loss=86.1]
Epoch 13/20: 100%|██████████| 438/438 [00:02<00:00, 155.99batch/s, los





