In [12]:
import numpy as np
from tqdm import trange
import matplotlib.pyplot as plt
import pandas as pd
import os

import torch
from torch import nn
from torch.utils import data

from math import sqrt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

### DemVCNet

In [None]:
class Mlp(nn.Module):
    def __init__(self, in_features, hidden_features=None, act_layer=nn.GELU, drop=0., pred=True):
        super().__init__()
        #out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.q = nn.Linear(in_features, in_features)
        self.k = nn.Linear(in_features, in_features)
        self.v = nn.Linear(in_features, in_features)
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.fc2 = nn.Linear(hidden_features, hidden_features)
        self.fc3 = nn.Linear(hidden_features, hidden_features)
        self.fc4 = nn.Linear(hidden_features, hidden_features)
        self.fc5 = nn.Linear(hidden_features, hidden_features)
        self.fc6 = nn.Linear(hidden_features, hidden_features)
        self.act = act_layer()
        self.pred = pred
        self.drop = nn.Dropout(drop)
            
        if pred==True:
            self.fcx = nn.Linear(hidden_features,1)
        else:
            self.fcx = nn.Linear(hidden_features, in_features)

    def forward(self, x):
        x0 = x
        q = self.q(x).unsqueeze(2)
        k = self.k(x).unsqueeze(2)
        v = self.v(x).unsqueeze(2)
        attn = (q @ k.transpose(-2, -1))
        attn = attn/np.sqrt(k.size(-2))
        attn = attn.softmax(dim=-1)
        x = (attn @ v).squeeze(2)
        x += x0

        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        x = self.act(x)
        
        x = self.fc3(x)
        x = self.act(x)
        x = self.fc4(x)
        x = self.act(x)
    
        x = self.drop(x)
        x = self.fcx(x)
        x = self.drop(x)


        x = x.squeeze(0)

        return x


In [14]:
class TF(nn.Module):
    def __init__(self, in_features, ftout_feature,act_layer=nn.GELU,drop=0.):
        super().__init__()
        self.ft1 = nn.Linear(in_features, ftout_feature)
        self.ft2 = nn.Linear(ftout_feature, ftout_feature)
        self.Block1 = Mlp(in_features=ftout_feature, hidden_features=64, act_layer=nn.GELU, drop=drop, pred=False)
        self.Block2 = Mlp(in_features=ftout_feature, hidden_features=64, act_layer=nn.GELU, drop=drop, pred=True)
        self.act = act_layer()
    def forward(self, x):
        x=self.ft1(x)
        x = self.act(x)
        x=self.ft2(x)
        x = self.act(x)
        return self.Block2(self.Block1(x))


<font color=red>Modify the number of input features</font>

In [15]:
def get_net():
    #net = nn.Sequential(nn.Linear(in_features, 64), nn.ReLU(), nn.Linear(64,1)).to(device)
    net = TF(in_features=8,ftout_feature=2, drop=0.).to(device)
    return net

### Functions

#### Training sub-functions

In [None]:
# Load dataset
def load_array(data_arrays, batch_size, is_train=True):
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

# Set Loss function
def rmse(net, features, labels):
    preds = net(features)
    mse = loss(preds, labels)  
    rmse = torch.sqrt(mse)  
    return rmse.item()  

In [None]:
# train
def train(
    net,
    train_features,
    train_labels,
    test_features,
    test_labels,
    num_epochs,
    learning_rate,
    weight_decay,
    batch_size,
):
    train_ls, test_ls = [], []
    best_score = 9999
    best_epoch = 0
    train_iter = load_array((train_features, train_labels), batch_size)
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    for epoch in trange(num_epochs):
        for X, y in train_iter:
            optimizer.zero_grad()
            l = loss(net(X), y)
            l.backward()
            optimizer.step()

        train_ls.append(rmse(net, train_features, train_labels))
        if test_labels is not None:
            test_score = rmse(net, test_features, test_labels)
            test_ls.append(test_score)
            if best_score > test_score:
                best_score = test_score
                best_epoch = epoch + 1
                torch.save(net.state_dict(), Net_Save_path_pre+"/tempnet.pth")

    return train_ls, test_ls, best_score, best_epoch


#### шонч╗Г

In [None]:
# train and pred
def train_and_pred(
    train_features,
    test_features,
    train_labels,
    test_labels,
    num_epochs,
    lr,
    weight_decay,
    batch_size,
    dataset_index,
):
    net = get_net()
    print(next(net.parameters()).device)
    
    train_ls, test_ls, best_score, best_epoch = train(
        net,
        train_features,
        train_labels,
        test_features,
        test_labels,
        num_epochs,
        lr,
        weight_decay,
        batch_size,
    )
    train_ls_all = []
    train_ls_all.append(train_ls)
    test_ls_all = []
    test_ls_all.append(test_ls)
    print(next(net.parameters()).device)

    # Paint
    plt.plot(np.arange(1, num_epochs + 1, 1), train_ls_all[0], "b")
    plt.plot(np.arange(1, num_epochs + 1, 1), test_ls_all[0], "r")
    plt.xlabel("epoch"), plt.ylabel("rmse")
    plt.legend(["train", "test"])
    plt.grid(True)
    plt.savefig(Net_Save_path_pre+"/"+ str(dataset_index)+".png")
    plt.show()
    print(f"train log rmse {float(train_ls[-1]):f}")

    # Read and store the best model
    net.load_state_dict(torch.load(Net_Save_path_pre+"/tempnet.pth"))
    torch.save(net.state_dict(),Net_Save_path_pre+"/"+ str(dataset_index)+ ".pth")
    print(next(net.parameters()).device)
    return net,best_epoch


#### Predict and analyze errors

In [19]:
def calculate_predict_error(net, features, labels):
    preds = net(features).cpu().detach().numpy()
    truth= labels.cpu().detach().numpy()
    h_error=truth-preds

    model_mae = mean_absolute_error(truth, preds)
    model_rmse = sqrt(mean_squared_error(truth, preds))
    model_r2 = r2_score(truth, preds)

    abs_error = []
    error2 = []
    for i in range(0, len(h_error)):
        abs_error.append(abs(h_error[i]))
        error2.append(pow(h_error[i], 2))
    abs_error.sort()
    mean = sum(h_error) / len(error2)
    mae = sum(abs_error) / len(error2)
    abs_mid = abs_error[int(len(abs_error) / 2)]
    abs_max = abs_error[len(abs_error)-1]
    le90 = abs_error[int(len(abs_error) * 0.9)]
    rmse = pow((sum(error2) / len(error2)), 0.5)

    return model_mae,model_rmse,model_r2,mean[0], mae[0], abs_mid[0], abs_max[0], le90[0], rmse[0],h_error
    # return model_mae[0],model_rmse[0],model_r2[0],mean[0], mae[0], abs_mid[0], abs_max[0], le90[0], rmse[0],h_error

#### Error analysis

### Set input and output

In [None]:
# Network hyperparameter Settings
'''
Note that the amount of data should not be an integer multiple of batch_size +1. 
For example, if bacth_size=64 and the number of training sets is 321, 
the shape of the last batch will be (1,feature_num), 
which may turn into a one-dimensional matrix during the output of the inner network, leading to errors
'''
num_epochs, lr, weight_decay, batch_size = 3, 0.0005, 0.01, 128
loss = nn.MSELoss()

# Select training equipment
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# Read all HCPs
CP_all = pd.read_excel("dataset/CP/GDCPs_8f3c.xlsx")

# Set the path and quantity of the dataset
data_path_pre = 'dataset/CP/Select_CP_BestPosition/RMSE9_Bagging/'
data_num=7
MAX_train_num=1000 
MAX_train_frac=0.5   


cuda


In [None]:
# Output
# Analysis of fitting results and errors after adjustment
Final_Herr=np.zeros((data_num*2,11)) # 11 = The number of HCPS, the optimal number of rounds, model evaluation ME*3, error evaluation EE*6

# Error table after adjustment
col0 = "Terrain classification"
col1 = "Land cover"
col2 = "Vegetation coverage"
col3 = "dZ"

Data_Save_path_pre="dataset/EA/CP_error_afterEA/TETF/ep"+str(num_epochs)+'_lr'+str(lr)+'_bs'+str(batch_size)
Net_Save_path_pre="dataset/EA/TETF_netsave/ep"+str(num_epochs)+'_lr'+str(lr)+'_bs'+str(batch_size)
if not os.path.exists(Data_Save_path_pre):
    os.makedirs(Data_Save_path_pre)
if not os.path.exists(Net_Save_path_pre):
    os.makedirs(Net_Save_path_pre)


### Network fitting

In [None]:
# for i in range(0,data_num):
for i in range(0, 1):
    # for i in (0, 1, 2, 3):
    # Read data
    print("Dataset:" + str(i) )
    temp_CP_index = pd.read_excel(data_path_pre + str(i) + "0.xlsx")
    temp_CP_data = pd.concat(
        (CP_all.iloc[temp_CP_index.iloc[:, 0], 0:-1], temp_CP_index.set_index("CP_index")), axis=1
    )

    # Divide the training/test data
    if temp_CP_data.shape[0] / 2 > MAX_train_num:
        train_raw = temp_CP_data.sample(MAX_train_num, random_state=0)
        test_raw = temp_CP_data.drop(train_raw.index)
    else:
        train_raw = temp_CP_data.sample(frac=MAX_train_frac, random_state=0)
        test_raw = temp_CP_data.drop(train_raw.index)
    print(
        "Training set size" + str(train_raw.shape[0]) + ",Test set size" + str(test_raw.shape[0]),
    )
    print("Partial data display")
    print(train_raw.iloc[0:4])
    print(" ")

    # Data preprocessing
    # Merge the data used
    combine_features_raw = pd.concat(
        (
            train_raw.iloc[:, 0:-4],
            test_raw.iloc[:, 0:-4],
        )
    )
    combine_features = combine_features_raw.copy()
    combine_label_raw = pd.concat(
        (
            train_raw.iloc[:, -4:],
            test_raw.iloc[:, -4:],
        )
    )
    combine_label = combine_label_raw.copy()

    # Standardization
    combine_features.iloc[:, 0:-4] = combine_features.iloc[:, 0:-4].apply(
        lambda x: (x - x.mean()) / (x.std())
    )
    combine_features.iloc[:, 0:-4] = combine_features.iloc[:, 0:-4].fillna(0)
    print("Part of the processed data is displayed")
    print(combine_features.iloc[0:4])
    print(" ")

    # Convert to Tensor data
    n_train = train_raw.shape[0]
    train_features = torch.tensor(combine_features[:n_train].values, dtype=torch.float32).to(device)
    test_features = torch.tensor(combine_features[n_train:].values, dtype=torch.float32).to(device)
    all_features = torch.tensor(combine_features.values, dtype=torch.float32).to(device)

    train_labels = torch.tensor(combine_label[:n_train].values, dtype=torch.float32).to(device)
    test_labels = torch.tensor(combine_label[n_train:].values, dtype=torch.float32).to(device)
    all_labels = torch.tensor(combine_label.values, dtype=torch.float32).to(device)
    
    # Training
    Best_net, Best_epoch = train_and_pred(
        train_features,
        test_features,
        train_labels[:, -1].reshape(-1, 1),
        test_labels[:, -1].reshape(-1, 1),
        num_epochs,
        lr,
        weight_decay,
        batch_size,
        i,
    )

    # Prediction/Save
    # Test set
    ME_1,ME_2,ME_3,EE_1,EE_2,EE_3,EE_4,EE_5,EE_6,Herror = calculate_predict_error(
        Best_net, test_features, test_labels[:, -1].reshape(-1, 1),
    )
    Final_Herr[i, :] = [Herror.shape[0], Best_epoch, ME_1, ME_2, ME_3, EE_1, EE_2, EE_3, EE_4, EE_5, EE_6]

    print("Best Turn:{}".format(Best_epoch))
    print("Model fitting  MAE:{:.2f},RMSE:{:.2f},R2:{:.2f}".format(ME_1,ME_2, ME_3))
    print("Elevation accuracy (Test):  MEAN:{:.2f},MAE:{:.2f},RMSE:{:.2f}".format(EE_1,EE_2, EE_6))
    # Complete dataset
    ME_1,ME_2,ME_3,EE_1,EE_2,EE_3,EE_4,EE_5,EE_6,Herror = calculate_predict_error(
        Best_net, all_features, all_labels[:, -1].reshape(-1, 1),
    )
    Final_Herr[i+data_num, 0:11] = [Herror.shape[0], 0, ME_1, ME_2, ME_3, EE_1, EE_2, EE_3, EE_4, EE_5, EE_6]
    output_data = pd.DataFrame(
        {
            col0: all_labels[:, -4].cpu().detach().numpy(),
            col1: all_labels[:, -3].cpu().detach().numpy(),
            col2: all_labels[:, -2].cpu().detach().numpy(),
            col3: Herror.flatten(),
        }
    )
    output_data.to_excel(
        Data_Save_path_pre+"/" + str(i) + ".xlsx",
        sheet_name="sheet1",
        index=False,
    )
    print("Elevation accuracy (Complete):   MEAN:{:.2f},MAE:{:.2f},RMSE:{:.2f}".format(EE_1,EE_2, EE_6))

    print(" ")
    print(" ")
    print(" ")


### Save output

In [None]:
# # Export the elevation error after adjustment
# col0 = "CP_num"
# col1 = "Best_epoch"
# col2 = "Model_MAE"
# col3 = "Model_RMSE"
# col4 = "Model_R2"
# col5 = "MEAN"
# col6 = "MAE"
# col7 = "MID"
# col8 = "MAX"
# col9 = "LE90"
# col10 = "RMSE"

# data = pd.DataFrame(
#     {
#         col0: Final_Herr[:, 0],
#         col1: Final_Herr[:, 1],
#         col2: Final_Herr[:, 2],
#         col3: Final_Herr[:, 3],
#         col4: Final_Herr[:, 4],
#         col5: Final_Herr[:, 5],
#         col6: Final_Herr[:, 6],
#         col7: Final_Herr[:, 7],
#         col8: Final_Herr[:, 8],
#         col9: Final_Herr[:, 9],
#         col10: Final_Herr[:, 10],
#     }
# )
# data.to_excel('output/EA/Final_Herr_TETF/ep'+str(num_epochs)+'_lr'+str(lr)+'_bs'+str(batch_size)+'.xlsx', sheet_name="sheet1", index=False)