In [1]:
import h5py
import pandas as pd
import numpy as np
import os
import re
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import math
import scipy.io as sio
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as pl
from sklearn import metrics
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary

In [2]:
# 设置文件夹路径和样本名称
folder_path = 'D:/data/waveforms2/'

# 创建字典来存储不同 SNR 值下的样本数据和属性信息
all_samples_by_snr = {
    '50.00': {'white_Data': [], 'white_Signal': [], 'attributes': []},
    '100.00': {'white_Data': [], 'white_Signal': [], 'attributes': []},
    '200.00': {'white_Data': [], 'white_Signal': [], 'attributes': []}
}

# 正则表达式匹配 SNR 值
snr_pattern = re.compile(r'_SNR(\d+\.\d+)\.h5')

# 遍历文件夹中的每个文件
for filename in os.listdir(folder_path):
    # 确保只处理 .h5 文件
    if filename.endswith('.h5'):
        file_path = os.path.join(folder_path, filename)
        
        # 提取 SNR 值
        match = snr_pattern.search(filename)
        if match:
            snr_value = match.group(1)  # 获取 SNR 值（如 '50.00'）
            
            # 检查是否在预期的 SNR 值中
            if snr_value in all_samples_by_snr:
                try:
                    with h5py.File(file_path, 'r') as file:
                        # 检查文件中是否包含 'data' 数据集
                        if 'Data' in file:
                            Data = file['Data']
                            
                            # 读取样本数据
                            if isinstance(Data, h5py.Dataset):
                                sample_1 = Data[0].flatten()  # 将样本扁平化为一维数组
                                sample_2 = Data[1].flatten()  # 将样本扁平化为一维数组
                            elif isinstance(Data, h5py.Group):
                                # 如果 'data' 是组
                                sample_1 = Data['white_Data'][:]
                                sample_2 = Data['white_signal'][:]
                            else:
                                print(f"Unexpected data type in {filename}")
                                continue
                            
                            # 读取属性信息
                            attributes = {attr: Data.attrs[attr] for attr in Data.attrs}
                            
                            # 将样本数据和属性信息添加到对应的 SNR 类别中
                            all_samples_by_snr[snr_value]['white_Data'].append(sample_1)
                            all_samples_by_snr[snr_value]['white_Signal'].append(sample_2)
                            all_samples_by_snr[snr_value]['attributes'].append(attributes)
                        else:
                            print(f"'Data' dataset not found in {filename}")
                
                # 捕获并报告文件无法读取的错误
                except (OSError, KeyError) as e:
                    print(f"Error reading file {filename}: {e}")

# 将每个 SNR 类别的样本列表拼接成完整的数据集
combined_datasets = {}
for snr_value, samples in all_samples_by_snr.items():
    combined_datasets[snr_value] = {
        'white_Data': np.array(samples['white_Data']),
        'white_Signal': np.array(samples['white_Signal']),
        'attributes': samples['attributes']  # 保留属性信息
    }
    print(f"SNR {snr_value} - white_Data shape:", combined_datasets[snr_value]['white_Data'].shape)
    print(f"SNR {snr_value} - white_Signal shape:", combined_datasets[snr_value]['white_Signal'].shape)
    print(f"SNR {snr_value} - Attributes:", combined_datasets[snr_value]['attributes'][0])  # 显示第一个文件的属性信息


SNR 50.00 - white_Data shape: (18000, 6184)
SNR 50.00 - white_Signal shape: (18000, 6184)
SNR 50.00 - Attributes: {'dl_true': 1.0910208776455039e+19, 'eta_true': 0.2410471954822854, 'iota_true': 1.2747004358482887, 'mc_true': 201.3973973282873, 'phic_true': 5.603175015853413, 'phis_true': 2.661901610522322, 'psi_true': 2.749441536415439, 'snr': 50, 'tc_true': 81014.19034346812, 'thetas_true': 1.48090896530732}
SNR 100.00 - white_Data shape: (18000, 6184)
SNR 100.00 - white_Signal shape: (18000, 6184)
SNR 100.00 - Attributes: {'dl_true': 1.0910208776455039e+19, 'eta_true': 0.2345313550836741, 'iota_true': 2.0045559545930582, 'mc_true': 117.15661887775255, 'phic_true': 3.562486464513828, 'phis_true': 0.7690836855688769, 'psi_true': 0.17080533179531923, 'snr': 100, 'tc_true': 72421.7463165898, 'thetas_true': 0.9723749157083255}
SNR 200.00 - white_Data shape: (17999, 6184)
SNR 200.00 - white_Signal shape: (17999, 6184)
SNR 200.00 - Attributes: {'dl_true': 1.0910208776455039e+19, 'eta_true'

In [3]:
if '50.00' in combined_datasets:
    target_values = []

    # 遍历每个样本的属性，提取 mc 的值
    for attributes in combined_datasets['50.00']['attributes']:
        target_value = attributes.get('mc_true')  # 获取 mc 的值
#         target_value = attributes.get('tc_true')  # 获取 tc 的值
        if target_value is not None:
            target_values.append(target_value)

    # 创建 shape 为 (样本数, 1) 的数组
    target_SNR50 = np.array(target_values).reshape(-1, 1)

    print("target_SNR50 shape:", target_SNR50.shape)
    print("target_SNR50 values:", target_SNR50)

if '100.00' in combined_datasets:
    target_values = []

    # 遍历每个样本的属性，提取 mc 的值
    for attributes in combined_datasets['100.00']['attributes']:
        target_value = attributes.get('mc_true')  # 获取 mc 的值
#         target_value = attributes.get('tc_true')  # 获取 tc 的值
        if target_value is not None:
            target_values.append(target_value)

    # 创建 shape 为 (样本数, 1) 的数组
    target_SNR100 = np.array(target_values).reshape(-1, 1)

    print("target_SNR100 shape:", target_SNR100.shape)
    print("target_SNR100 values:", target_SNR100)
    
if '200.00' in combined_datasets:
    target_values = []

    # 遍历每个样本的属性，提取 mc 的值
    for attributes in combined_datasets['200.00']['attributes']:
        target_value = attributes.get('mc_true')  # 获取 mc 的值
#         target_value = attributes.get('tc_true')  # 获取 tc 的值
        if target_value is not None:
            target_values.append(target_value)

    # 创建 shape 为 (样本数, 1) 的数组
    target_SNR200 = np.array(target_values).reshape(-1, 1)

    print("target_SNR200 shape:", target_SNR200.shape)
    print("target_SNR200 values:", target_SNR200)

target_SNR50 shape: (18000, 1)
target_SNR50 values: [[201.39739733]
 [117.15661888]
 [117.15661888]
 ...
 [201.39739733]
 [201.39739733]
 [201.39739733]]
target_SNR100 shape: (18000, 1)
target_SNR100 values: [[117.15661888]
 [117.15661888]
 [117.15661888]
 ...
 [201.39739733]
 [201.39739733]
 [201.39739733]]
target_SNR200 shape: (17999, 1)
target_SNR200 values: [[117.15661888]
 [117.15661888]
 [117.15661888]
 ...
 [201.39739733]
 [201.39739733]
 [201.39739733]]


In [4]:
data_SNR50 = combined_datasets['50.00']['white_Data']
signal_SNR50 = combined_datasets['50.00']['white_Signal']

data_SNR100 = combined_datasets['100.00']['white_Data']
signal_SNR100 = combined_datasets['100.00']['white_Signal']

data_SNR200 = combined_datasets['200.00']['white_Data']
signal_SNR200 = combined_datasets['200.00']['white_Signal']

print("data_SNR50 shape:", data_SNR50.shape)
print("data_SNR50 values:", data_SNR50)

data_SNR50 shape: (18000, 6184)
data_SNR50 values: [[ 0.03137989  0.25731843 -0.27357243 ...  0.50245534 -0.05588465
  -0.02275486]
 [-0.36055721  0.02079585 -0.11857392 ...  0.43280806 -0.05776062
  -0.22265224]
 [-0.09969713 -0.13879934 -0.04909878 ...  0.16727563  0.5150171
   0.26231936]
 ...
 [-0.35501129 -0.4650888  -0.30697662 ... -0.15451082 -0.25009854
   0.17222949]
 [ 0.10760243 -0.03824991  0.0096525  ... -0.51311919 -0.32216384
  -0.18480158]
 [ 0.28727159 -0.29340144  0.04059797 ... -0.15664948 -0.5164065
   0.03016353]]


In [5]:
data = signal_SNR200  #  选择数据
label = target_SNR200  #  选择标签

#  划分数据集
x_train, x_valtest, y_train, y_valtest = train_test_split(data, label, test_size=0.2, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_valtest, y_valtest, test_size=0.5, random_state=42)

print("训练集:", x_train.shape, y_train.shape)
print("验证集:", x_val.shape, y_val.shape)
print("测试集:", x_test.shape, y_test.shape)

训练集: (14399, 6184) (14399, 1)
验证集: (1800, 6184) (1800, 1)
测试集: (1800, 6184) (1800, 1)


In [6]:
#  标准化
std = StandardScaler().fit(x_train)
x_train = std.transform(x_train)
x_val = std.transform(x_val)
x_test = std.transform(x_test)

x_train

array([[ 0.19784915,  0.19734082,  0.19678974, ...,  1.54332901,
         1.5391046 ,  1.5467802 ],
       [ 0.45710036,  0.45412562,  0.45111574, ...,  1.53885862,
         1.54794727,  1.5196064 ],
       [ 0.35817274,  0.34568083,  0.33321574, ...,  0.47958742,
         0.49146276,  0.45847318],
       ...,
       [ 0.20237902,  0.20298519,  0.20355837, ...,  0.96407423,
         0.94234758,  1.00204403],
       [ 0.45846104,  0.45885631,  0.45921656, ...,  0.43965176,
         0.43246186,  0.45640482],
       [-0.65425841, -0.65784239, -0.66136536, ..., -0.40504931,
        -0.37743084, -0.44672226]])

In [7]:
label_std = StandardScaler().fit(y_train)
y_train = label_std.transform(y_train)
y_val = label_std.transform(y_val)
y_test = label_std.transform(y_test)

y_train = y_train.reshape(-1)
y_val = y_val.reshape(-1)
y_test = y_test.reshape(-1)

y_train

array([ 2.59461128, -0.35259493, -1.33779135, ...,  2.54983522,
       -0.75734024, -0.86193809])

In [8]:
x_train = torch.unsqueeze(torch.from_numpy(x_train), dim=1).to(torch.float32)
x_val = torch.unsqueeze(torch.from_numpy(x_val), dim=1).to(torch.float32)
x_test = torch.unsqueeze(torch.from_numpy(x_test), dim=1).to(torch.float32)

y_train = torch.from_numpy(y_train).to(torch.float32)
y_val = torch.from_numpy(y_val).to(torch.float32)
y_test = torch.from_numpy(y_test).to(torch.float32)

x_train.shape, x_val.shape, x_test.shape, y_train.shape, y_val.shape, y_test.shape

(torch.Size([14399, 1, 6184]),
 torch.Size([1800, 1, 6184]),
 torch.Size([1800, 1, 6184]),
 torch.Size([14399]),
 torch.Size([1800]),
 torch.Size([1800]))

In [9]:
train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)
test_dataset = TensorDataset(x_test, y_test)

batch_size = 1
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [10]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=7, stride=stride, padding=3, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(self.expansion * out_channels)
            )

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(residual)
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, layers):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv1d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self.make_layer(block, 64, layers[0], stride=1)
        self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self.make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(512 * block.expansion, 1)  # 输出一个值

    def make_layer(self, block, out_channels, blocks, stride=1):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)

        return out


def ResNetModel():
    return ResNet(BasicBlock, [2, 2, 2, 2])

In [11]:
class WaveUNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=2, base_channels=64):
        super(WaveUNet, self).__init__()

        self.encoder1 = self.conv_block(in_channels, base_channels)
        self.encoder2 = self.conv_block(base_channels, base_channels * 2)
        self.encoder3 = self.conv_block(base_channels * 2, base_channels * 4)
        self.encoder4 = self.conv_block(base_channels * 4, base_channels * 8)
        self.encoder5 = self.conv_block(base_channels * 8, base_channels * 16)
        self.encoder6 = self.conv_block(base_channels * 16, base_channels * 32)
        self.encoder7 = self.conv_block(base_channels * 32, base_channels * 64)
        self.encoder8 = self.conv_block(base_channels * 64, base_channels * 128)

        self.decoder8 = self.conv_block(base_channels * 128, base_channels * 64)
        self.decoder7 = self.conv_block(base_channels * 64, base_channels * 32)
        self.decoder6 = self.conv_block(base_channels * 32, base_channels * 16)
        self.decoder5 = self.conv_block(base_channels * 16, base_channels * 8)
        self.decoder4 = self.conv_block(base_channels * 8, base_channels * 4)
        self.decoder3 = self.conv_block(base_channels * 4, base_channels * 2)
        self.decoder2 = self.conv_block(base_channels * 2, base_channels)
        self.decoder1 = self.conv_block(base_channels, out_channels, final_layer=True)

        self.pool = nn.MaxPool1d(2)
        self.upconv8 = nn.ConvTranspose1d(base_channels * 128, base_channels * 64, kernel_size=2, stride=2)
        self.upconv7 = nn.ConvTranspose1d(base_channels * 64, base_channels * 32, kernel_size=2, stride=2)
        self.upconv6 = nn.ConvTranspose1d(base_channels * 32, base_channels * 16, kernel_size=2, stride=2)
        self.upconv5 = nn.ConvTranspose1d(base_channels * 16, base_channels * 8, kernel_size=2, stride=2)
        self.upconv4 = nn.ConvTranspose1d(base_channels * 8, base_channels * 4, kernel_size=2, stride=2)
        self.upconv3 = nn.ConvTranspose1d(base_channels * 4, base_channels * 2, kernel_size=2, stride=2)
        self.upconv2 = nn.ConvTranspose1d(base_channels * 2, base_channels, kernel_size=2, stride=2)

    def conv_block(self, in_channels, out_channels, final_layer=False):
        layers = [nn.Conv1d(in_channels, out_channels, kernel_size=15, padding=7),  # 使用较大的卷积核15
                  nn.ReLU(inplace=True)]
        
        if not final_layer:
            layers.append(nn.Conv1d(out_channels, out_channels, kernel_size=15, padding=7))  # 使用较大的卷积核15
            layers.append(nn.ReLU(inplace=True))
        
        return nn.Sequential(*layers)

    def forward(self, x):

        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.pool(enc1))
        enc3 = self.encoder3(self.pool(enc2))
        enc4 = self.encoder4(self.pool(enc3))
        enc5 = self.encoder5(self.pool(enc4))
        enc6 = self.encoder6(self.pool(enc5))
        enc7 = self.encoder7(self.pool(enc6))
        enc8 = self.encoder8(self.pool(enc7))

        dec8 = self.upconv8(enc8)
        dec8 = torch.cat([dec8, enc7], dim=1)
        dec8 = self.decoder8(dec8)

        dec7 = self.upconv7(dec8)
        if dec7.size(2) != enc6.size(2):
            enc6 = F.pad(enc6, (0, dec7.size(2) - enc6.size(2)))
        dec7 = torch.cat([dec7, enc6], dim=1)
        dec7 = self.decoder7(dec7)

        dec6 = self.upconv6(dec7)
        if dec6.size(2) != enc5.size(2):
            enc5 = F.pad(enc5, (0, dec6.size(2) - enc5.size(2)))
        dec6 = torch.cat([dec6, enc5], dim=1)
        dec6 = self.decoder6(dec6)

        dec5 = self.upconv5(dec6)
        if dec5.size(2) != enc4.size(2):
            enc4 = F.pad(enc4, (0, dec5.size(2) - enc4.size(2)))
        dec5 = torch.cat([dec5, enc4], dim=1)
        dec5 = self.decoder5(dec5)

        dec4 = self.upconv4(dec5)
        if dec4.size(2) != enc3.size(2):
            enc3 = F.pad(enc3, (0, dec4.size(2) - enc3.size(2)))
        dec4 = torch.cat([dec4, enc3], dim=1)
        dec4 = self.decoder4(dec4)

        dec3 = self.upconv3(dec4)
        if dec3.size(2) != enc2.size(2):
            enc2 = F.pad(enc2, (0, dec3.size(2) - enc2.size(2)))
        dec3 = torch.cat([dec3, enc2], dim=1)
        dec3 = self.decoder3(dec3)

        dec2 = self.upconv2(dec3)
        if dec2.size(2) != enc1.size(2):
            enc1 = F.pad(enc1, (0, dec2.size(2) - enc1.size(2)))
        dec2 = torch.cat([dec2, enc1], dim=1)
        dec2 = self.decoder2(dec2)

        dec1 = self.decoder1(dec2)
        
        return dec1

In [12]:
class Model(nn.Module):
    def __init__(self, wave_u_net, pred_model):
        super(Model, self).__init__()
        self.wave_u_net = wave_u_net
        self.pred_model = pred_model

    def forward(self, x):
        denoised_output = self.wave_u_net(x)
        denoised_signal = denoised_output[:, 0, :]

        predictions = self.pred_model(denoised_signal.unsqueeze(1))  # 添加通道维度
        return predictions

# 初始化模型
wave_u_net = WaveUNet(in_channels=1, out_channels=2, base_channels=64)
pred_model = ResNetModel()

In [13]:
model = Model(wave_u_net, pred_model)
# model = ResNetModel()

In [14]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
best_val_loss = float('inf')

In [15]:
summary(model, input_size=(1, 6184))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 64, 6184]           1,024
              ReLU-2             [-1, 64, 6184]               0
            Conv1d-3             [-1, 64, 6184]          61,504
              ReLU-4             [-1, 64, 6184]               0
         MaxPool1d-5             [-1, 64, 3092]               0
            Conv1d-6            [-1, 128, 3092]         123,008
              ReLU-7            [-1, 128, 3092]               0
            Conv1d-8            [-1, 128, 3092]         245,888
              ReLU-9            [-1, 128, 3092]               0
        MaxPool1d-10            [-1, 128, 1546]               0
           Conv1d-11            [-1, 256, 1546]         491,776
             ReLU-12            [-1, 256, 1546]               0
           Conv1d-13            [-1, 256, 1546]         983,296
             ReLU-14            [-1, 25

In [16]:
# model.load_state_dict(torch.load("D:/code/best_model.pt"))

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)

    train_loss /= len(train_loader.dataset)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)

            val_loss += loss.item() * inputs.size(0)

    val_loss /= len(val_loader.dataset)

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "D:/code/best_model.pt")

  return F.mse_loss(input, target, reduction=self.reduction)


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.75 GiB. GPU 

In [None]:
model.load_state_dict(torch.load("D:/code/best_model.pt"))
model.eval()

test_loss = 0.0
all_outputs = []  # 用于存储所有测试输出
all_labels = []   # 用于存储所有标签

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        test_outputs = model(inputs)
        
        # 将当前输出和标签存储到列表中
        all_outputs.append(test_outputs.cpu().numpy())  # 将输出移回 CPU 并转换为 NumPy 数组
        all_labels.append(labels.cpu().numpy())          # 将标签移回 CPU 并转换为 NumPy 数组

        loss = criterion(test_outputs.squeeze(), labels)
        test_loss += loss.item() * inputs.size(0)

# 计算平均损失
test_loss /= len(test_loader.dataset)

# 合并输出和标签
all_outputs = np.concatenate(all_outputs, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

print(f"Test Loss: {test_loss:.4f}")

In [None]:
inv_all_labels = label_std.inverse_transform(all_labels.reshape(-1, 1))
inv_all_outputs = label_std.inverse_transform(all_outputs)

pl.figure(figsize=(10, 5))
pl.scatter(inv_all_labels, inv_all_outputs, alpha=0.5)
pl.xlabel("True Mc")
pl.ylabel("Predicted Mc")
pl.title("Prediction of Mc")
pl.grid()
pl.show()