In [25]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import Dataset, DataLoader
data_path = r"F:\cache_data\zone_ana\dy\train_data\train_20240726.csv"

# 加载数据
df = pd.read_csv(data_path)
df = df[["TZ","Centroid_X","Centroid_Y","DEM_RANGE","DEM_MEAN","DEM_STD","AnalyticalHillshading_MEAN","Aspect_MEAN","ChannelNetworkBaseLevel_MEAN","ChannelNetworkDistance_MEAN",
    "ConvergenceIndex_MEAN","LSFactor_MEAN","MRRTF_MEAN","MRVBF_MEAN","PlanCurvature_MEAN","ProfileCurvature_MEAN","RelativeSlopePosition_MEAN","Slope_MEAN","TopographicWetnessIndex_MEAN",
    "TotalCatchmentArea_MEAN","ValleyDepth_MEAN","NIGHT2022_MEAN","ETP2022_mean_MEAN","TMP2022_mean_MEAN","PRE2022_mean_MEAN","PRE2022_3_MEAN","PRE2022_11_MEAN","ETP2022_3_MEAN",
    "ETP2022_11_MEAN", "TMP2022_3_MEAN", "TMP2022_11_MEAN", "evi_MEAN", "lswi_MEAN", "mndwi_MEAN", "ndmi_MEAN", "ndvi_MEAN", "ndwi_MEAN", "PCA_0_MEAN", "PCA_1_MEAN", "savi_MEAN",
    "vari_MEAN", "DL_MAJORITY", "SlopeClass_MAJORITY", "DZ_MAJORITY"]]
# 分离特征和目标
X = df.drop(['TZ'], axis=1)
y = df['TZ']

# 编码目标变量
le = LabelEncoder()
y = le.fit_transform(y)

# 分割数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化数值特征
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 转换为PyTorch张量
X_train_tensor = torch.FloatTensor(X_train_scaled)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_train_tensor = torch.LongTensor(y_train)
y_test_tensor = torch.LongTensor(y_test)

# 创建数据集和数据加载器
class SoilDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = SoilDataset(X_train_tensor, y_train_tensor)
test_dataset = SoilDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
class EmbeddingMixture(nn.Module):
    def __init__(self, input_dim, embed_dim, num_components=16):
        super().__init__()
        self.input_dim = input_dim
        self.embed_dim = embed_dim
        self.num_components = num_components
        self.linear_layers = nn.ModuleList([nn.Linear(1, embed_dim) for _ in range(input_dim)])
        self.mixer = nn.Linear(input_dim, num_components)
        
    def forward(self, x):
        # x shape: (batch_size, input_dim)
        components = [layer(x[:, i:i+1]) for i, layer in enumerate(self.linear_layers)]
        components = torch.stack(components, dim=1)  # (batch_size, input_dim, embed_dim)
        
        mixer_weights = self.mixer(x).softmax(dim=-1)  # (batch_size, num_components)
        mixer_weights = mixer_weights.unsqueeze(1).expand(-1, self.input_dim, -1)  # (batch_size, input_dim, num_components)
        
        mixed = torch.einsum('bic,bid->bdc', mixer_weights, components)  # (batch_size, num_components, embed_dim)
        
        return mixed.mean(dim=1)  # (batch_size, embed_dim)

class LWTA(nn.Module):
    def __init__(self, input_dim, output_dim, block_size=2):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.block_size = block_size
        self.num_blocks = output_dim // block_size
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        output = self.linear(x)
        output = output.view(-1, self.num_blocks, self.block_size)
        output = torch.max(output, dim=2, keepdim=True)[0]
        return output.view(-1, self.num_blocks)

class HybridTransformerLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead)
        self.ffn = nn.Sequential(
            LWTA(d_model, dim_feedforward),
            nn.Linear(dim_feedforward // 2, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        
    def forward(self, src):
        src2 = self.self_attn(src, src, src)[0]
        src = src + self.norm1(src2)
        src2 = self.ffn(src)
        src = src + self.norm2(src2)
        return src

class SoilClassifier(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=256, nhead=8, num_layers=4):
        super().__init__()
        self.embedding = EmbeddingMixture(input_dim, d_model)
        self.transformer_layers = nn.ModuleList([
            HybridTransformerLayer(d_model, nhead, d_model * 4) for _ in range(num_layers)
        ])
        self.classifier = nn.Linear(d_model, num_classes)
        
    def forward(self, x):
        x = self.embedding(x)  # Now x shape is (batch_size, d_model)
        x = x.unsqueeze(0)  # Add sequence dimension: (1, batch_size, d_model)
        for layer in self.transformer_layers:
            x = layer(x)
        x = x.squeeze(0)  # Remove sequence dimension
        return self.classifier(x)
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    return total_loss / len(train_loader)

def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()
    
    accuracy = correct / len(test_loader.dataset)
    avg_loss = total_loss / len(test_loader)
    return avg_loss, accuracy

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 初始化模型
input_dim = X_train.shape[1]
num_classes = len(np.unique(y))
d_model = 16  # 确保这个值与上面的设置一致
model = SoilClassifier(input_dim, num_classes, d_model=d_model, nhead=8, num_layers=4).to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

# 训练模型
num_epochs = 100
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    test_loss, test_accuracy = evaluate(model, test_loader, criterion, device)
    
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
    print()

# 最终评估
final_loss, final_accuracy = evaluate(model, test_loader, criterion, device)
print(f"Final Test Loss: {final_loss:.4f}, Final Test Accuracy: {final_accuracy:.4f}")

In [36]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import Dataset, DataLoader

data_path = r"F:\cache_data\zone_ana\dy\train_data\train_20240726.csv"

# 加载数据
df = pd.read_csv(data_path)
df = df[["TZ","Centroid_X","Centroid_Y","DEM_RANGE","DEM_MEAN","DEM_STD","AnalyticalHillshading_MEAN","Aspect_MEAN","ChannelNetworkBaseLevel_MEAN","ChannelNetworkDistance_MEAN",
    "ConvergenceIndex_MEAN","LSFactor_MEAN","MRRTF_MEAN","MRVBF_MEAN","PlanCurvature_MEAN","ProfileCurvature_MEAN","RelativeSlopePosition_MEAN","Slope_MEAN","TopographicWetnessIndex_MEAN",
    "TotalCatchmentArea_MEAN","ValleyDepth_MEAN","NIGHT2022_MEAN","ETP2022_mean_MEAN","TMP2022_mean_MEAN","PRE2022_mean_MEAN","PRE2022_3_MEAN","PRE2022_11_MEAN","ETP2022_3_MEAN",
    "ETP2022_11_MEAN", "TMP2022_3_MEAN", "TMP2022_11_MEAN", "evi_MEAN", "lswi_MEAN", "mndwi_MEAN", "ndmi_MEAN", "ndvi_MEAN", "ndwi_MEAN", "PCA_0_MEAN", "PCA_1_MEAN", "savi_MEAN",
    "vari_MEAN", "DL_MAJORITY", "SlopeClass_MAJORITY", "DZ_MAJORITY"]]

# 检查数据中是否有 NaN 或 Inf 值
print(df.isnull().sum())  # 使用Pandas的isnull方法，它适用于所有数据类型

# 检查DataFrame是否包含任何非数值类型的数据
if df.dtypes.apply(lambda x: x.kind in 'bifc').all():
    # 如果所有列都是数值类型，使用np.isnan检查NaN
    print(np.any(np.isnan(df.values)))
    print(np.any(np.isinf(df.values)))
else:
    # 如果DataFrame包含非数值类型，逐列检查
    for col in df.columns:
        if np.issubdtype(df[col].dtype, np.number):
            print(f"NaN values in '{col}':", np.isnan(df[col]).sum())
            print(f"Inf values in '{col}':", np.isinf(df[col]).sum())

# 分离特征和目标
X = df.drop(['TZ'], axis=1)
y = df['TZ']

# 编码目标变量
le = LabelEncoder()
y = le.fit_transform(y)

# 分割数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 标准化数值特征
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 转换为PyTorch张量
X_train_tensor = torch.FloatTensor(X_train_scaled)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_train_tensor = torch.LongTensor(y_train)
y_test_tensor = torch.LongTensor(y_test)

# 创建数据集和数据加载器
class SoilDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = SoilDataset(X_train_tensor, y_train_tensor)
test_dataset = SoilDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 定义模型
class EmbeddingMixture(nn.Module):
    def __init__(self, input_dim, embed_dim, num_components=16):
        super().__init__()
        self.input_dim = input_dim
        self.embed_dim = embed_dim
        self.num_components = num_components
        self.linear_layers = nn.ModuleList([nn.Linear(1, embed_dim) for _ in range(input_dim)])
        self.mixer = nn.Linear(input_dim, num_components)
        
    def forward(self, x):
        components = [layer(x[:, i:i+1]) for i, layer in enumerate(self.linear_layers)]
        components = torch.stack(components, dim=1)
        
        mixer_weights = self.mixer(x).softmax(dim=-1)
        mixer_weights = mixer_weights.unsqueeze(1).expand(-1, self.input_dim, -1)
        
        mixed = torch.einsum('bic,bid->bdc', mixer_weights, components)
        
        return mixed.mean(dim=1)

class LWTA(nn.Module):
    def __init__(self, input_dim, output_dim, block_size=2):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.block_size = block_size
        self.num_blocks = output_dim // block_size
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        output = self.linear(x)
        output = output.view(-1, self.num_blocks, self.block_size)
        output = torch.max(output, dim=2, keepdim=True)[0]
        return output.view(-1, self.num_blocks)

class HybridTransformerLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead)
        self.ffn = nn.Sequential(
            LWTA(d_model, dim_feedforward),
            nn.Linear(dim_feedforward // 2, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        
    def forward(self, src):
        src2 = self.self_attn(src, src, src)[0]
        src = src + self.norm1(src2)
        src2 = self.ffn(src)
        src = src + self.norm2(src2)
        return src

class SoilClassifier(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=256, nhead=8, num_layers=4):
        super().__init__()
        self.embedding = EmbeddingMixture(input_dim, d_model)
        self.transformer_layers = nn.ModuleList([
            HybridTransformerLayer(d_model, nhead, d_model * 4) for _ in range(num_layers)
        ])
        self.classifier = nn.Linear(d_model, num_classes)
        
    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(0)
        for layer in self.transformer_layers:
            x = layer(x)
        x = x.squeeze(0)
        return self.classifier(x)

def initialize_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        if torch.isnan(loss):
            print("NaN loss detected")
            print("Inputs:", inputs)
            print("Outputs:", outputs)
            print("Targets:", targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(train_loader)

def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            if torch.isnan(loss):
                print("NaN loss detected")
                print("Inputs:", inputs)
                print("Outputs:", outputs)
                print("Targets:", targets)
            total_loss += loss.item()

            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()

    accuracy = correct / len(test_loader.dataset)
    avg_loss = total_loss / len(test_loader)
    return avg_loss, accuracy

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 初始化模型
input_dim = X_train.shape[1]
num_classes = len(np.unique(y))
d_model = 16
model = SoilClassifier(input_dim, num_classes, d_model=d_model, nhead=8, num_layers=4).to(device)
model.apply(initialize_weights)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

# 训练模型
num_epochs = 2
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    test_loss, test_accuracy = evaluate(model, test_loader, criterion, device)
    
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
    print()

# 最终评估
final_loss, final_accuracy = evaluate(model, test_loader, criterion, device)
print(f"Final Test Loss: {final_loss:.4f}, Final Test Accuracy: {final_accuracy:.4f}")


TZ                                 0
Centroid_X                         0
Centroid_Y                         0
DEM_RANGE                       1988
DEM_MEAN                        1988
DEM_STD                         1988
AnalyticalHillshading_MEAN      1988
Aspect_MEAN                     1988
ChannelNetworkBaseLevel_MEAN    1988
ChannelNetworkDistance_MEAN     1988
ConvergenceIndex_MEAN           1988
LSFactor_MEAN                   1988
MRRTF_MEAN                      1988
MRVBF_MEAN                      1988
PlanCurvature_MEAN              1988
ProfileCurvature_MEAN           1988
RelativeSlopePosition_MEAN      1988
Slope_MEAN                      1988
TopographicWetnessIndex_MEAN    1988
TotalCatchmentArea_MEAN         1988
ValleyDepth_MEAN                1988
NIGHT2022_MEAN                  1988
ETP2022_mean_MEAN               1988
TMP2022_mean_MEAN               1988
PRE2022_mean_MEAN               1988
PRE2022_3_MEAN                  1988
PRE2022_11_MEAN                 1988
E