In [1]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from scipy.stats import kurtosis, skew

# 1. 加载数据
dataX = np.loadtxt('dataset1/dataX.txt')
dataY = np.loadtxt('dataset1/dataY.txt')
dataZ = np.loadtxt('dataset1/dataZ.txt')
dataLabels = np.loadtxt('dataset1/dataLabel.txt')  # 假设每一行有两列

# 2. 数据预处理：合并 x, y, z 方向的数据
data = np.column_stack((dataX, dataY, dataZ))

# 将标签分为两个部分：defect 标签和 severity 标签
labels_defect = dataLabels[:, 0].astype(int)  # 道路缺陷标签
labels_severity = dataLabels[:, 1].astype(int)  # 严重程度标签

# 3. 时域特征提取函数
def extract_features(data):
    features = []
    for group in data:
        feature = []
        feature.append(np.mean(group))         # 均值
        feature.append(np.std(group))          # 标准差
        feature.append(np.min(group))          # 最小值
        feature.append(np.max(group))          # 最大值
        feature.append(np.var(group))          # 方差
        feature.append(np.sqrt(np.mean(group**2)))  # 均方根
        feature.append(kurtosis(group))        # 峰度
        feature.append(skew(group))            # 偏度
        features.append(feature)
    return np.array(features)

# 提取 x, y, z 三个方向的时域特征
features_X = extract_features(dataX)
features_Y = extract_features(dataY)
features_Z = extract_features(dataZ)

# 将特征合并
data_features = np.column_stack((features_X, features_Y, features_Z))

# 4. 划分数据集
X_train, X_test, y_train_defect, y_test_defect, y_train_severity, y_test_severity = train_test_split(
    data_features, labels_defect, labels_severity, test_size=0.2, stratify=labels_defect, random_state=42
)

# 5. 创建 PyTorch 数据加载器
# 转换为 PyTorch 张量
train_tensor = torch.tensor(X_train, dtype=torch.float32)
test_tensor = torch.tensor(X_test, dtype=torch.float32)

train_labels_defect_tensor = torch.tensor(y_train_defect, dtype=torch.long)
test_labels_defect_tensor = torch.tensor(y_test_defect, dtype=torch.long)

train_labels_severity_tensor = torch.tensor(y_train_severity, dtype=torch.long)
test_labels_severity_tensor = torch.tensor(y_test_severity, dtype=torch.long)

# 创建 TensorDataset 和 DataLoader
train_dataset = TensorDataset(train_tensor, train_labels_defect_tensor, train_labels_severity_tensor)
test_dataset = TensorDataset(test_tensor, test_labels_defect_tensor, test_labels_severity_tensor)
batch_size = 1024  # 你可以在这里修改批大小
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print("数据集划分完成")
print(f"训练集大小: {len(train_dataset)}")
print(f"测试集大小: {len(test_dataset)}")


数据集划分完成
训练集大小: 998
测试集大小: 250


In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.stats import kurtosis, skew

# 1. 加载数据
dataX = np.loadtxt('dataset1/dataX.txt')
dataY = np.loadtxt('dataset1/dataY.txt')
dataZ = np.loadtxt('dataset1/dataZ.txt')
dataLabels = np.loadtxt('dataset1/dataLabel.txt')  # 假设每一行有两列

# 将标签分为两个部分：defect 标签和 severity 标签
labels_defect = dataLabels[:, 0].astype(int)  # 道路缺陷标签
labels_severity = dataLabels[:, 1].astype(int)  # 严重程度标签

# 2. 时域特征提取函数
def extract_time_features(data):
    features = []
    for group in data:
        feature = []
        feature.append(np.mean(group))         # 均值
        feature.append(np.std(group))          # 标准差
        feature.append(np.min(group))          # 最小值
        feature.append(np.max(group))          # 最大值
        feature.append(np.var(group))          # 方差
        feature.append(np.sqrt(np.mean(group**2)))  # 均方根
        feature.append(kurtosis(group))        # 峰度
        feature.append(skew(group))            # 偏度
        features.append(feature)
    return np.array(features)

# 3. 频域特征提取函数
def extract_frequency_features(data):
    features = []
    for group in data:
        fft_vals = np.fft.fft(group)
        amplitude_spectrum = np.abs(fft_vals)
        phase_spectrum = np.angle(fft_vals)

        # 计算频域特征
        feature = []
        feature.append(np.mean(amplitude_spectrum))        # 幅度谱均值
        feature.append(np.std(amplitude_spectrum))         # 幅度谱标准差
        feature.append(np.mean(phase_spectrum))            # 相位谱均值
        feature.append(np.std(phase_spectrum))             # 相位谱标准差
        feature.append(np.mean(amplitude_spectrum[:len(amplitude_spectrum)//2]))  # 低频部分均值
        feature.append(np.mean(amplitude_spectrum[len(amplitude_spectrum)//2:]))  # 高频部分均值
        features.append(feature)
    return np.array(features)

# 提取 x, y, z 三个方向的时域特征和频域特征
time_features_X = extract_time_features(dataX)
time_features_Y = extract_time_features(dataY)
time_features_Z = extract_time_features(dataZ)

freq_features_X = extract_frequency_features(dataX)
freq_features_Y = extract_frequency_features(dataY)
freq_features_Z = extract_frequency_features(dataZ)

# 将时域特征和频域特征合并
features_X = np.column_stack((time_features_X, freq_features_X))
features_Y = np.column_stack((time_features_Y, freq_features_Y))
features_Z = np.column_stack((time_features_Z, freq_features_Z))

# 将特征合并
data_features = np.column_stack((features_X, features_Y, features_Z))

# 4. 划分数据集
X_train, X_test, y_train_defect, y_test_defect, y_train_severity, y_test_severity = train_test_split(
    data_features, labels_defect, labels_severity, test_size=0.2, stratify=labels_defect, random_state=42
)

# 5. 定义模型
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout_rate):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc_defect = nn.Linear(hidden_dim, 3)  # 道路缺陷分类输出 (假设是 3 类)
        self.fc_severity = nn.Linear(hidden_dim, 4)  # 严重程度分类输出 (假设是 4 类)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        defect_output = self.fc_defect(x)
        severity_output = self.fc_severity(x)
        return defect_output, severity_output

# 6. 定义训练和测试过程
def train_and_evaluate_model(learning_rate, batch_size, dropout_rate, hidden_dim):
    # 创建数据加载器
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                  torch.tensor(y_train_defect, dtype=torch.long),
                                  torch.tensor(y_train_severity, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32),
                                torch.tensor(y_test_defect, dtype=torch.long),
                                torch.tensor(y_test_severity, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # 定义模型
    model = SimpleNN(input_dim=X_train.shape[1], hidden_dim=hidden_dim, dropout_rate=dropout_rate)
    
    # 定义损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 训练模型
    num_epochs = 20
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, defect_labels, severity_labels in train_loader:
            optimizer.zero_grad()
            defect_preds, severity_preds = model(inputs)
            loss_defect = criterion(defect_preds, defect_labels)
            loss_severity = criterion(severity_preds, severity_labels)
            loss = loss_defect + loss_severity
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

    # 在验证集上评估模型
    model.eval()
    all_defect_labels = []
    all_defect_preds = []
    all_severity_labels = []
    all_severity_preds = []

    with torch.no_grad():
        for inputs, defect_labels, severity_labels in val_loader:
            defect_preds, severity_preds = model(inputs)
            _, predicted_defects = torch.max(defect_preds, 1)
            _, predicted_severities = torch.max(severity_preds, 1)

            all_defect_labels.extend(defect_labels.cpu().numpy())
            all_defect_preds.extend(predicted_defects.cpu().numpy())
            all_severity_labels.extend(severity_labels.cpu().numpy())
            all_severity_preds.extend(predicted_severities.cpu().numpy())

    # 计算验证集上的准确率
    accuracy_defect = accuracy_score(all_defect_labels, all_defect_preds)
    accuracy_severity = accuracy_score(all_severity_labels, all_severity_preds)

    avg_accuracy = (accuracy_defect + accuracy_severity) / 2
    return -avg_accuracy

# 7. 使用 Optuna 进行超参数优化
import optuna

def objective(trial):
    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    hidden_dim = trial.suggest_categorical('hidden_dim', [32, 64, 128])

    return train_and_evaluate_model(learning_rate, batch_size, dropout_rate, hidden_dim)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # 进行 50 次试验

# 输出最佳超参数
print("Best hyperparameters: ", study.best_params)

# 使用最佳超参数重新训练模型
best_params = study.best_params
train_and_evaluate_model(best_params['lr'], best_params['batch_size'], best_params['dropout_rate'], best_params['hidden_dim'])


[32m[I 2024-09-17 16:15:00,143][0m Finished trial#0 resulted in value: -0.8260000000000001. Current best value is -0.8260000000000001 with parameters: {'lr': 0.007904286522713692, 'batch_size': 32, 'dropout_rate': 0.3777513159299868, 'hidden_dim': 64}.[0m
[32m[I 2024-09-17 16:15:00,839][0m Finished trial#1 resulted in value: -0.252. Current best value is -0.8260000000000001 with parameters: {'lr': 0.007904286522713692, 'batch_size': 32, 'dropout_rate': 0.3777513159299868, 'hidden_dim': 64}.[0m
[32m[I 2024-09-17 16:15:02,276][0m Finished trial#2 resulted in value: -0.6859999999999999. Current best value is -0.8260000000000001 with parameters: {'lr': 0.007904286522713692, 'batch_size': 32, 'dropout_rate': 0.3777513159299868, 'hidden_dim': 64}.[0m
[32m[I 2024-09-17 16:15:02,753][0m Finished trial#3 resulted in value: -0.602. Current best value is -0.8260000000000001 with parameters: {'lr': 0.007904286522713692, 'batch_size': 32, 'dropout_rate': 0.3777513159299868, 'hidden_dim':

Best hyperparameters:  {'lr': 0.0006871684713341972, 'batch_size': 16, 'dropout_rate': 0.10354175985501725, 'hidden_dim': 64}


-0.81