In [None]:
import torch
import numpy as np            
import pandas as pd               
from torch import nn      
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import DataLoader
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import datetime
import os
import matplotlib.pyplot as plt

from EnvironmentXY import *

In [None]:
print(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

In [None]:
import random

def seed_everything(seed=42):
    """
    Function to seed all the possible sources of randomness for reproducibility.
    
    Args:
        seed (int): The seed value to use.
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def evaluate_CNN_model(model,x_train,y_train,x_test,y_test,model_name=None):

    true = y_train
    true = true.cpu().numpy().flatten()
    pred = model(x_train.unsqueeze(1))
    pred = pred.cpu().detach().numpy().flatten() 
    
    plt.figure(figsize=(7,5),dpi=400, constrained_layout=True)
    plt.subplot(221)
    plt.plot(range(1,len(y_train)+1),true,color = 'r',label='Measured value')
    plt.plot(range(1,len(y_train)+1),pred,color = 'b',label='Estimated value')
    plt.xlabel(f'Number of samples in the modeling set({x_train.shape[0]})', fontsize=12, fontproperties=sim_sun)
    plt.ylabel(r'''SOM content(g/kg$^{-1}$)''', fontsize=12, fontproperties=sim_sun)
    plt.legend(loc='upper left',frameon=False)
    plt.title(model_name, fontdict={'fontsize': 11,'family': 'Times New Roman'})
    
    plt.subplot(222)
    # 1:1线   
    x = np.linspace(0, max(true), 100)
    plt.plot(x, x, linestyle='--', color='black',lw=1, label='1:1 line')
    # 拟合线
    coefficients = np.polyfit(true, pred, 1)
    polynomial = np.poly1d(coefficients)
    fitted_values = polynomial(true)
    plt.plot(true, fitted_values, 'r-', lw=1, label='Fitted line')
    plt.scatter(true, pred,c='black',s=20)
    # plt.plot(x, x, linestyle='--', color='black',)
    plt.xlabel(r'''Measured SOM content(g/kg$^{-1}$)''', fontsize=12, fontproperties=sim_sun)
    plt.ylabel(r'''Estimated SOM content(g/kg$^{-1}$)''', fontsize=12, fontproperties=sim_sun)
    mse = mean_squared_error(true, pred)
    r2 = r2_score(true, pred)
    rmse = np.sqrt(mse)
    rpd = np.std(true)/rmse
    plt.text(30, 0.05, f'R$^2$={r2:.2f}\nRPD={rpd:.2f}\nRMSE={rmse:.2f}', fontsize=12, color='black', fontdict={'fontsize': 18,'style': 'italic','family': 'Times New Roman'})
    plt.title(model_name, fontdict={'fontsize': 11,'family': 'Times New Roman'})
    plt.legend(ncol=1, frameon=False, loc='upper left')

    true = y_test
    true = true.cpu().numpy().flatten()
    pred = model(x_test.unsqueeze(1))
    pred = pred.cpu().detach().numpy().flatten() 
    
    plt.subplot(223)
    plt.plot(range(1,len(y_test)+1),true,color = 'r',label='Measured value')
    plt.plot(range(1,len(y_test)+1),pred,color = 'b',label='Estimated value')
    plt.xlabel(f'Number of samples in the testing set({x_test.shape[0]})', fontsize=12, fontproperties=sim_sun)
    plt.ylabel(r'''SOM content(g/kg$^{-1}$)''', fontsize=12, fontproperties=sim_sun)
    plt.legend(loc='lower left',frameon=False)
    plt.title(model_name, fontdict={'fontsize': 11,'family': 'Times New Roman'})
    
    plt.subplot(224)
    # # 1:1线
    x = np.linspace(0, max(true), 100)
    plt.plot(x, x, linestyle='--', color='black',lw=1, label='1:1 line')
    # 拟合线
    coefficients = np.polyfit(true, pred, 1)
    polynomial = np.poly1d(coefficients)
    fitted_values = polynomial(true)
    plt.plot(true, fitted_values, 'r-', lw=1, label='Fitted line')
    plt.scatter(true, pred,c='black',s=20)
    plt.xlabel(r'''Measured SOM content(g/kg$^{-1}$)''', fontsize=12, fontproperties=sim_sun)
    plt.ylabel(r'''Estimated SOM content(g/kg$^{-1}$)''', fontsize=12, fontproperties=sim_sun)
    # plt.title(title)
    mse = mean_squared_error(true, pred)
    r2 = r2_score(true, pred)
    rmse = np.sqrt(mse)
    rpd = np.std(true)/rmse
    plt.text(24, 0.05, f'R$^2$={r2:.2f}\nRPD={rpd:.2f}\nRMSE={rmse:.2f}', fontsize=12, color='black', fontdict={'fontsize': 18,'style': 'italic','family': 'Times New Roman'})
    plt.title(model_name, fontdict={'fontsize': 11,'family': 'Times New Roman'})
    plt.legend(ncol=1, frameon=False, loc='upper left')
    
    plt.savefig(f'../../Images/E2/{model_name}.png',bbox_inches = 'tight')
    pass

In [None]:
data = pd.read_csv("../../Datas/Paper_data/土壤有机质数据/2024第二批数据(96个土样)/re_vis-NIR.csv")

data.head()

In [None]:
X = data.loc[:,"X400":"X2400"].values.astype("float32")
Y = data["SOM"].values.astype("float32")
wavelengths = np.linspace(400, 2400, X.shape[1])

In [None]:
x_train, x_test, y_train, y_test = ks.train_test_split(X, Y, test_size = 0.3)
# x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42 )
# draw_boxplot(Y,y_train,y_test)

In [None]:
x_train.shape

In [None]:
def load_array(data_arrays, batch_size, is_train=True):
    dataset = torch.utils.data.TensorDataset(*data_arrays)
    return DataLoader(dataset, batch_size, shuffle=is_train)    

In [None]:
# 生成样本
fake_date = pd.read_csv("../../Datas/Fake_Datas/E2/7v3/awgan_20250131(33)/[G_epoch=16500]/generate_data[n=564].csv")

fake_x = fake_date.loc[:,"X400":"X2400"].values.astype("float32")
fake_y = fake_date["SOM"].values.astype("float32")

# 扩充建模集
expend_x = np.vstack((x_train,fake_x))
expend_y = np.hstack((y_train,fake_y))

In [None]:
# gam = 1.2

# X_train = glfdiff(MSC(SG(expend_x, w=17, p=2)), gam)
# X_test = glfdiff(MSC(SG(x_test, w=17, p=2)), gam)
# model_name = f'FOD[{gam}]'

# model_name = f'D1'
# X_train = D1(MSC(SG(expend_x, w=17, p=2)))
# X_test = D1(MSC(SG(x_test, w=17, p=2)))

X_train = MSC(SG(expend_x, w=17, p=2))
X_test = MSC(SG(x_test, w=17, p=2))
model_name = 'R'


# gam = 1.6
# model_name = f'FOD[{gam}]'
# X_train = glfdiff(MSC(SG(x_train, w=17, p=2)), gam)
# X_test = glfdiff(MSC(SG(x_test, w=17, p=2)), gam)

# model_name = f'D2'
# X_train = D2(MSC(SG(x_train, w=17, p=2)))
# X_test = D2(MSC(SG(x_test, w=17, p=2)))

# model_name = f'R'
# X_train = MSC(SG(x_train, w=17, p=2))
# X_test = MSC(SG(x_test, w=17, p=2))

In [None]:
std = StandardScaler()
X_train = std.fit_transform(X_train)
X_test = std.fit_transform(X_test)

# scaler = MinMaxScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.fit_transform(X_test)

In [None]:
X_train = torch.tensor(X_train, dtype=torch.float32).to("cuda")
X_test = torch.tensor(X_test, dtype=torch.float32).to("cuda")
Y_train = torch.tensor(expend_y, dtype=torch.float32).to("cuda")
Y_test = torch.tensor(y_test, dtype=torch.float32).to("cuda")

a=X_train.shape[0] 
b = X_test.shape[0]

Y_train = Y_train.reshape(a, 1)
Y_test  = Y_test.reshape(b, 1)

In [None]:
class Reshape(nn.Module):
    def __init__(self, *args):
        super(Reshape, self).__init__()
        self.shape = args
    def forward(self, x):
        return x.view((x.size(0),)+self.shape)

In [None]:
from contextlib import contextmanager

@contextmanager
def clear_vars_on_exit():
    before_globals = set(globals().keys())
    try:
        yield
    finally:
        after_globals = set(globals().keys())
        new_globals = after_globals - before_globals
        for var in new_globals:
            if var in globals():
                del globals()[var]

In [None]:
# 调用函数设置种子
# seed_value = 42  # 选择一个固定的种子值
seed_value = 42  # 选择一个固定的种子值
seed_everything(seed_value)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

class HuberLoss(nn.Module):
    def __init__(self, delta=1.0):
        super(HuberLoss, self).__init__()
        self.delta = delta

    def forward(self, y_pred, y_true):
        error = y_pred - y_true
        abs_error = torch.abs(error)
        quadratic = 0.5 * (abs_error ** 2)
        linear = self.delta * (abs_error - 0.5 * self.delta)
        loss = torch.where(abs_error <= self.delta, quadratic, linear)
        return torch.mean(loss)

if model_name=='D2':
    layer_number = 384
else:
    layer_number = 400

net = None

net = nn.Sequential( 
    nn.Conv1d(1, 16, 9,padding='same', stride=1),
    nn.BatchNorm1d(16),
    # nn.LeakyReLU(0.01),
    nn.SELU(),
    
    nn.MaxPool1d(kernel_size = 2, stride=2),
    
    nn.Conv1d(16, 32, 9,padding='same', stride=1),
    nn.BatchNorm1d(32),
    # nn.LeakyReLU(0.01),
    nn.SELU(),
    
    nn.MaxPool1d(kernel_size = 2, stride=2),
    
    nn.Conv1d(32, 16, 9,padding='same', stride=1),
    nn.BatchNorm1d(16),
    # nn.LeakyReLU(0.01),
    nn.SELU(),
    
    nn.MaxPool1d(kernel_size = 2, stride=2),

    nn.Flatten(),
    nn.Dropout(0.5),
    nn.Linear(layer_number,128),
    nn.Dropout(0.5),
    nn.Linear(128,1),
    # nn.Dropout(0.5),
    # nn.Linear(64,1),
    
    
).to("cuda")

with clear_vars_on_exit():
    num_epochs = 250
    batch_size =32
    ee = 0    
    
    loss = nn.SmoothL1Loss()
    # loss = HuberLoss(delta=1.0)
    trainer = optim.Adam(net.parameters(), lr=0.0002, weight_decay=1e-4)
    
    dataset = TensorDataset(X_train, Y_train)
    train_size = int(len(dataset) * 0.7)  
    val_size = len(dataset) - train_size 
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    # train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
                  
    best_loss = float('inf')  
    
    for epoch in range(num_epochs):
        net.train()
        for X_, y_ in train_loader:
            X_, y_ = X_.unsqueeze(1).to("cuda"), y_.to("cuda")
            yy = net(X_)
            l = loss(yy, y_)
            trainer.zero_grad()
            l.backward()
            trainer.step()
        net.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_val, y_val in val_loader:
                X_val, y_val = X_val.unsqueeze(1).to("cuda"), y_val.to("cuda")
                val_loss += loss(net(X_val), y_val).item()
        val_loss /= len(val_loader)
    
        print(f'Epoch {epoch + 1}, Validation Loss: {val_loss:.6f}, Best Loss: {best_loss:.6f}, Best Model Epoch: {ee + 1}')
    
        if val_loss < best_loss:
            best_loss = val_loss
            # torch.save(net.state_dict(), f'../../models/E2/1D-CNN/DR-WGAN-GP/{model_name}_best_model.pth')
            ee = epoch
        if epoch-ee>=10:
            torch.save(net.state_dict(), f'../../models/E2/1D-CNN/DR-WGAN-GP/{model_name}_best_model.pth')
            break

In [None]:
state_dict = torch.load(f'../../models/E2/1D-CNN/DR-WGAN-GP/{model_name}_best_model.pth') 
net.load_state_dict(state_dict)  
net.eval()

evaluate_CNN_model(net,X_train,Y_train,X_test,Y_test,model_name=f'{model_name}_CNN')

In [None]:
import gc
del net,state_dict
gc.collect()  # 手动触发垃圾回收