In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets,transforms
import pandas as pd
import numpy as np
# torch.__version__=1.13.1

1. 数据预处理

In [32]:
path="D:/SRP/raw_data.xlsx"
raw_data=pd.read_excel(path,header=1)
col=len(raw_data.columns)
density=raw_data['DENSITY']
components=raw_data.iloc[:,1:(col-1)]

In [33]:
# device configuration
device=torch.device('cuda' if torch.cuda.is_available() else "cpu")

In [34]:
components=components.astype(np.float32)
X_train=torch.from_numpy(components.values)
# 数据归一化后效果可能更好
length=len(X_train[0])

2. 网络定义

In [35]:
class AutoencoderSRP(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder=nn.Sequential(
        nn.Linear(33,16),nn.ReLU(),
        nn.Linear(16,8),nn.ReLU(),
        nn.Linear(8,4))
        self.decoder=nn.Sequential(
        nn.Linear(4,8),nn.ReLU(),
        nn.Linear(8,16),nn.ReLU(),
        nn.Linear(16,33),nn.ReLU())
    
    def forward(self,x):
        encoded=self.encoder(x)
        decoded=self.decoder(encoded)
        return decoded

3. 参数定义

In [36]:
model=AutoencoderSRP().to(device)
criterion=nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=1e-3,weight_decay=1e-5)

4. 训练权重

In [43]:
nums_epoch=10
n_samples=len(X_train)
for epoch in range(nums_epoch):
    for i,data in enumerate(X_train):
        data=data.reshape(-1,33).to(device)
        recall=model(data)
        loss=criterion(recall,data)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(i+1)%6000==0:
            print(f'Epoch:{epoch+1}/{nums_epoch},Step {i+1}/{n_samples},Loss:{loss.item():.4f}')

Epoch:1/10,Step 6000/64481,Loss:89.5685
Epoch:1/10,Step 12000/64481,Loss:140.5355
Epoch:1/10,Step 18000/64481,Loss:148.4608
Epoch:1/10,Step 24000/64481,Loss:124.7971
Epoch:1/10,Step 30000/64481,Loss:108.8388
Epoch:1/10,Step 36000/64481,Loss:88.5151
Epoch:1/10,Step 42000/64481,Loss:162.4242
Epoch:1/10,Step 48000/64481,Loss:99.8938
Epoch:1/10,Step 54000/64481,Loss:105.0303
Epoch:1/10,Step 60000/64481,Loss:148.5455
Epoch:2/10,Step 6000/64481,Loss:89.5989
Epoch:2/10,Step 12000/64481,Loss:140.5354
Epoch:2/10,Step 18000/64481,Loss:148.4610
Epoch:2/10,Step 24000/64481,Loss:124.8198
Epoch:2/10,Step 30000/64481,Loss:108.8388
Epoch:2/10,Step 36000/64481,Loss:88.5784
Epoch:2/10,Step 42000/64481,Loss:162.4545
Epoch:2/10,Step 48000/64481,Loss:99.8891
Epoch:2/10,Step 54000/64481,Loss:105.1515
Epoch:2/10,Step 60000/64481,Loss:148.6363
Epoch:3/10,Step 6000/64481,Loss:89.5685
Epoch:3/10,Step 12000/64481,Loss:140.5355
Epoch:3/10,Step 18000/64481,Loss:148.4913
Epoch:3/10,Step 24000/64481,Loss:124.8268
Ep

5. 保存权重

In [45]:
path='./weights.pth'
torch.save(model.state_dict(), path)

6. encoder 提取特征

In [46]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder=nn.Sequential(
        nn.Linear(33,16),nn.ReLU(),
        nn.Linear(16,8),nn.ReLU(),
        nn.Linear(8,4))
    
    def forward(self,x):
        return self.encoder(x)

In [47]:
net = Encoder()
pretext_model = torch.load('weights.pth')
model2_dict = net.state_dict()
state_dict = {k:v for k,v in pretext_model.items() if k in model2_dict.keys()}
model2_dict.update(state_dict)
net.load_state_dict(model2_dict)

<All keys matched successfully>

7. 保存特征

In [51]:
result=[]
for data in X_train:
    data=data.reshape(-1,33)
    refine=net(data)
    result.append(refine.detach().numpy()[0])

array=np.array(result)
# print(array)
vector=pd.DataFrame(array)
writer=pd.ExcelWriter('./result.xlsx')
vector.to_excel(writer, 'sheet_1', float_format='%.4f', header=False, index=True)
writer.save()

[[ 198.20093    38.223866 -109.93348    76.85621 ]
 [ 200.18657   -49.39833  -487.2141     50.556984]
 [-166.1096    -31.705608 -285.3175   -130.50688 ]
 ...
 [-205.89456  -159.81316  -480.51596   -99.856964]
 [ -92.20791   -27.366047  -23.9021     85.7311  ]
 [   7.82654   -50.218586  -68.74241   105.18215 ]]


  writer.save()


### 匹配权重之后可以对四维数组做新的分类或者回归任务