#### 导入神经网络中间层特征

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# 定义数据集类
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index], self.labels[index]


ModuleNotFoundError: No module named 'torch'

In [284]:
# 在训练之前设置随机种子
import random
import numpy as np
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [285]:
# 定义神经网络模型
# 利用神经网络提取中间层特征
# 但是有一个问题是为什么要用这样一个三个线形层的神经网络：
# 前馈神经网络：一个输入层，两个隐藏层和一个输出层

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2048, 30)
        self.fc2 = nn.Linear(30, 10)
        self.fc3 = nn.Linear(10, 1)

    def forward(self, x):
        intermediate_features = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(intermediate_features))  # 提取中间层特征
        x = self.fc3(x)
        return x, intermediate_features


In [286]:
# 从Excel读取数据集
data_df = pd.read_excel('Pb1.xlsx', sheet_name='Sheet1')
data = data_df.iloc[:, :2048].values
label_df = pd.read_excel('Pb1.xlsx', sheet_name="Sheet2")
labels = label_df.iloc[:, 0].values

# 数据预处理：标准化
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data)

# 假设data是包含特征数据的数组，labels是包含对应标签的数组
data_train, data_test, labels_train, labels_test = train_test_split(data_normalized, labels, test_size=0.2)

data_train_tensor = torch.tensor(data_train, dtype=torch.float32).clone().detach()
labels_train_tensor = torch.tensor(labels_train, dtype=torch.float32).clone().detach()
data_test_tensor = torch.tensor(data_test, dtype=torch.float32).clone().detach()
labels_test_tensor = torch.tensor(labels_test, dtype=torch.float32).clone().detach()

batch_size = 57

# 准备数据
train_dataset = CustomDataset(data_train, labels_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [296]:
# 存储最后一次训练的隐藏层特征
last_hidden_features = None

# 设置超参数

learning_rate = 0.0001
batch_size = 57
num_epochs = 1000

# 初始化模型和损失函数
model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练神经网络模型
total_step = len(train_dataloader)
for epoch in range(num_epochs):
    for inputs, targets in train_dataloader:
        outputs, intermediate_features = model(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))  # 将目标值展开为列向量进行比较
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        # 计算训练集上的预测精度差
        train_outputs,_ = model(data_train_tensor)
        train_rmse = mean_squared_error(labels_train_tensor, train_outputs.detach().numpy(), squared=False)
        train_r2 = r2_score(labels_train_tensor, train_outputs.detach().numpy())
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}, Train RMSE: {train_rmse}, Train R^2: {train_r2}")
        # 提取最后一次训练的隐藏层特征
    last_hidden_features = intermediate_features.detach().numpy().astype(np.float64)

Epoch [1/1000], Loss: 192188.34375, Train RMSE: 438.38104248046875, Train R^2: -0.13306500665677823
Epoch [2/1000], Loss: 192177.9375, Train RMSE: 438.3682556152344, Train R^2: -0.1329988753569864
Epoch [3/1000], Loss: 192166.734375, Train RMSE: 438.35528564453125, Train R^2: -0.13293192236604168
Epoch [4/1000], Loss: 192155.359375, Train RMSE: 438.343017578125, Train R^2: -0.13286836453062567
Epoch [5/1000], Loss: 192144.59375, Train RMSE: 438.33331298828125, Train R^2: -0.13281833996428238
Epoch [6/1000], Loss: 192136.09375, Train RMSE: 438.3243713378906, Train R^2: -0.13277219492256664
Epoch [7/1000], Loss: 192128.265625, Train RMSE: 438.31640625, Train R^2: -0.1327308662655593
Epoch [8/1000], Loss: 192121.25, Train RMSE: 438.30841064453125, Train R^2: -0.1326896120409209
Epoch [9/1000], Loss: 192114.25, Train RMSE: 438.30059814453125, Train R^2: -0.1326492472037093
Epoch [10/1000], Loss: 192107.4375, Train RMSE: 438.2926940917969, Train R^2: -0.13260835258166148
Epoch [11/1000], Lo

In [297]:
# 直接通过神经网路进行预测
model_path = "model_full_spectrum.pth"
# 将模型的参数保存到文件中
torch.save(model.state_dict(), model_path)
model1 = Net()
model1.load_state_dict(torch.load(model_path))

test_inputs = data_test_tensor
test_outputs,_ = model1(test_inputs)
test_outputs = test_outputs.detach().numpy()
test_outputs = np.squeeze(test_outputs)
labels_test = np.squeeze(labels_test)

print(test_outputs)
print(labels_test)
test_rmse = mean_squared_error(labels_test, test_outputs, squared=False)
test_r2 = r2_score(labels_test, test_outputs)
print("Test RMSE:", test_rmse)
print("Test R2:",test_r2)

if (test_r2>=0.88):
    good_model_full_spectrum_path = "good_model_full_spectrum.pth"+str(test_r2)
    torch.save(model.state_dict(),good_model_full_spectrum_path)


[ 5.5541353  44.06235     0.5568623  74.26954     0.40456483 43.76542
  0.18217534 16.04346     0.5568623  62.292477   85.68736    52.884235  ]
[35.7 31.  21.  24.7 17.  21.  28.  22.  31.  79.  58.  22. ]
Test RMSE: 26.589151540746386
Test R2: -1.3348397137471841


In [289]:
from sklearn.decomposition import PCA

pca = PCA(n_components=4)
data_normalized_pca = pca.fit_transform(data_normalized)
print(data_normalized_pca.shape)

class Net_PCA(nn.Module):
    def __init__(self):
        super(Net_PCA, self).__init__()
        self.fc1 = nn.Linear(4, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        intermediate_features = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(intermediate_features))  # 提取中间层特征
        x = self.fc3(x)
        return x, intermediate_features


# 假设data是包含特征数据的数组，labels是包含对应标签的数组
data_train_pca, data_test_pca, labels_train_pca, labels_test_pca = train_test_split(data_normalized_pca, labels, test_size=0.2, random_state=42)

data_train_tensor_pca = torch.tensor(data_train_pca, dtype=torch.float32).clone().detach()
labels_train_tensor_pca = torch.tensor(labels_train_pca, dtype=torch.float32).clone().detach()
data_test_tensor_pca = torch.tensor(data_test_pca, dtype=torch.float32).clone().detach()
labels_test_tensor_pca = torch.tensor(labels_test_pca, dtype=torch.float32).clone().detach()

# 准备数据
train_dataset_pca = CustomDataset(data_train_pca, labels_train_pca)
train_dataloader_pca = DataLoader(train_dataset_pca, batch_size=batch_size, shuffle=True)

# 存储最后一次训练的隐藏层特征
last_hidden_features = None

# 设置超参数

learning_rate = 0.001
batch_size = 57
num_epochs = 500

# 初始化模型和损失函数
model_pca = Net_PCA()
criterion = nn.MSELoss()
optimizer = optim.Adam(model_pca.parameters(), lr=learning_rate)

# 训练神经网络模型
total_step = len(train_dataloader_pca)
for epoch in range(num_epochs):
    for inputs, targets in train_dataloader_pca:
        outputs, intermediate_features = model_pca(inputs)
        loss = criterion(outputs, targets.unsqueeze(1))  # 将目标值展开为列向量进行比较
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        # 计算训练集上的预测精度差
        train_outputs,_ = model_pca(data_train_tensor_pca)
        train_rmse = mean_squared_error(labels_train_tensor_pca, train_outputs.detach().numpy(), squared=False)
        train_r2 = r2_score(labels_train_tensor_pca, train_outputs.detach().numpy())
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}, Train RMSE: {train_rmse}, Train R^2: {train_r2}")
        # 提取最后一次训练的隐藏层特征
    last_hidden_features = intermediate_features.detach().numpy().astype(np.float64)


(56, 4)
Epoch [1/500], Loss: 192589.359375, Train RMSE: 438.65338134765625, Train R^2: -0.13447333777314663
Epoch [2/500], Loss: 192416.796875, Train RMSE: 438.45751953125, Train R^2: -0.13346041940218467
Epoch [3/500], Loss: 192245.0, Train RMSE: 438.2488098144531, Train R^2: -0.13238164672152397
Epoch [4/500], Loss: 192062.015625, Train RMSE: 438.0294189453125, Train R^2: -0.13124793569067061
Epoch [5/500], Loss: 191869.75, Train RMSE: 437.8118896484375, Train R^2: -0.13012478575520592
Epoch [6/500], Loss: 191679.25, Train RMSE: 437.59649658203125, Train R^2: -0.12901306390038703
Epoch [7/500], Loss: 191490.6875, Train RMSE: 437.3786926269531, Train R^2: -0.12788954484856507
Epoch [8/500], Loss: 191300.140625, Train RMSE: 437.15740966796875, Train R^2: -0.12674847643689469
Epoch [9/500], Loss: 191106.609375, Train RMSE: 436.931640625, Train R^2: -0.12558510586720129


Epoch [10/500], Loss: 190909.265625, Train RMSE: 436.7046813964844, Train R^2: -0.12441606433770946
Epoch [11/500], Loss: 190711.0, Train RMSE: 436.4761047363281, Train R^2: -0.12323923312175444
Epoch [12/500], Loss: 190511.390625, Train RMSE: 436.2442626953125, Train R^2: -0.12204616007346569
Epoch [13/500], Loss: 190309.03125, Train RMSE: 436.0046081542969, Train R^2: -0.1208138181079137
Epoch [14/500], Loss: 190100.015625, Train RMSE: 435.75860595703125, Train R^2: -0.11954939843617685
Epoch [15/500], Loss: 189885.5625, Train RMSE: 435.50537109375, Train R^2: -0.11824840308266471
Epoch [16/500], Loss: 189664.90625, Train RMSE: 435.2492370605469, Train R^2: -0.11693346676955585
Epoch [17/500], Loss: 189441.890625, Train RMSE: 434.99346923828125, Train R^2: -0.1156212803759269
Epoch [18/500], Loss: 189219.328125, Train RMSE: 434.74530029296875, Train R^2: -0.11434857737992754
Epoch [19/500], Loss: 189003.453125, Train RMSE: 434.4920654296875, Train R^2: -0.1130507055792922
Epoch [20/5

In [290]:
# 直接通过神经网路进行预测
model_path_pca = "model_pca.pth"
# 将模型的参数保存到文件中
torch.save(model_pca.state_dict(), model_path_pca)
model2 = Net_PCA()
model2.load_state_dict(torch.load(model_path_pca))

test_inputs = data_test_tensor_pca
test_outputs,_ = model2(test_inputs)
test_outputs = test_outputs.detach().numpy()
test_outputs = np.squeeze(test_outputs)
labels_test = np.squeeze(labels_test_pca)

test_rmse = mean_squared_error(labels_test, test_outputs, squared=False)
test_r2 = r2_score(labels_test_pca, test_outputs)
print("Test RMSE:", test_rmse)
print("Test R2:",test_r2)

Test RMSE: 30.724145442702717
Test R2: -2.1175091576354363
