In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from scipy import signal
from scipy.stats import pearsonr
# import pywt  # Continuous Wavelet Transform
import copy
import scipy.stats as st
from scipy.special import comb
import seaborn as sns
from sympy import *
import math
from tensorflow.keras import layers, Model
import kennard_stone as ks 

print(tf.__version__)

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
path = '../../Datas/Paper_data/土壤有机质数据/2024第二批数据(96个土样)/re_vis-NIR.csv'
data = pd.read_csv(path)

In [None]:
data.head()

In [None]:
X = data.loc[:,"X400":"X2400"].values.astype("float32")
Y = data["SOM"].values.astype("float32")
wavelengths = np.linspace(400, 2400, X.shape[1])
train_data = data.values.astype("float32")

In [None]:
def show_hyperspectral_image(_data, title=None, x_label_start=0, sample_interval=10):
    y = _data
    x = range(0, _data.shape[1])
    axis_x_label = range(x_label_start, y.shape[1] * sample_interval + x_label_start, sample_interval)
    fig, ax = plt.subplots(figsize=[6, 4],dpi=400)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    for i in range(0, y.shape[0]):
        plt.plot(x, y[i])
    xticks_interval = 20 
    # xticks_interval = 200 
    plt.xticks(x[::xticks_interval], axis_x_label[::xticks_interval], rotation=0)
    plt.xlabel('Wavelength/nm', fontsize=13)
    plt.ylabel('Reflectance', fontsize=13)
    plt.title(title, fontsize=15)
    plt.grid(linestyle = '--',alpha=0.7)
    plt.show()

def SG(data, w=11, p=2):
    return signal.savgol_filter(data, w, p)

In [None]:
show_hyperspectral_image(SG(X,w=17,p=2),'Raw',400,10)

In [None]:
x_train, x_test, y_train, y_test = ks.train_test_split(train_data[:,1:], train_data[:,:1], test_size=0.3)

gan_train_data = np.hstack((y_train,x_train))

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
normalized_data = scaler.fit_transform(gan_train_data)

# reconstructed_data = scaler.inverse_transform(normalized_data)

gan_data_matrix = np.zeros((len(normalized_data),217,1)).astype(np.float32)
for i in range(len(normalized_data)):
    gan_data_matrix[i] = normalized_data[i].reshape((217,1))
gan_data_matrix.shape

In [None]:
latent_dim = 100      # 噪声向量维度
signal_length = 217   # 信号长度

In [None]:
def build_encoder():
    inputs = layers.Input(shape=(signal_length, 1))
    x = layers.Conv1D(32, 5, strides=2, padding='same')(inputs)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dropout(0.3)(x)
    
    x = layers.Conv1D(64, 5, strides=2, padding='same')(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(128)(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    
    z_mean = layers.Dense(latent_dim)(x)
    z_log_var = layers.Dense(latent_dim)(x)
    
    return Model(inputs, [z_mean, z_log_var], name="Encoder")
e_model = build_encoder()
e_model.summary()

In [None]:
def build_decoder():
    inputs = layers.Input(shape=(latent_dim,))
    
    x = layers.Dense(32, activation='relu', kernel_initializer='he_normal')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(64, activation='relu', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(128, activation='relu', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    
    outputs = layers.Dense(signal_length, activation='tanh')(x)
    
    return Model(inputs, outputs, name="Decoder")

d_model = build_decoder()
d_model.summary()

In [None]:
class VAE(Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.encoder = build_encoder()
        self.decoder = build_decoder()
        self.kl_weight = 0.01  # KL散度权重系数
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.0002, decay_steps=10000, decay_rate=0.9, staircase=True), beta_1=0.5, beta_2=0.999)
        
        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.rec_loss_tracker = tf.keras.metrics.Mean(name="recon_loss")
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.rec_loss_tracker,
            self.kl_loss_tracker
        ]

    def reparameterize(self, z_mean, z_log_var):
        batch_size = tf.shape(z_mean)[0]
        epsilon = tf.random.normal(shape=(batch_size, latent_dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var = self.encoder(data)
            z = self.reparameterize(z_mean, z_log_var)
            
            reconstructions = self.decoder(z)
            reconstructions = tf.reshape(reconstructions, [-1, signal_length, 1])
            
            # # 重构损失（L1 + L2）
            # rec_loss = tf.reduce_mean(
            #     tf.abs(data - reconstructions) + 
            #     0.5 * tf.square(data - reconstructions)
            # )
            rec_loss = tf.reduce_mean(
                tf.square(data - reconstructions)  # MSE loss
            )
            
            kl_loss = -0.5 * tf.reduce_mean(
                z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1
            )
            
            total_loss = rec_loss + self.kl_weight * kl_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        
        self.total_loss_tracker.update_state(total_loss)
        self.rec_loss_tracker.update_state(rec_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        
        return {
            "loss": self.total_loss_tracker.result(),
            "recon_loss": self.rec_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result()
    }

    def generate(self, num_samples):
        noise = tf.random.normal(shape=(num_samples, latent_dim))
        return self.decoder(noise)

In [None]:
class GANMonitor(keras.callbacks.Callback):
    def __init__(self, num_img=65, latent_dim=128, last_end_epochs=0):
        self.num_img = num_img
        self.latent_dim = latent_dim
        self.last_end_epochs = last_end_epochs

    def on_epoch_end(self, epoch, logs=None):
      if (self.last_end_epochs+epoch+1) % 100 == 0:
        self.model.encoder.save(f'../../models/1D-VAE[20250131]/GAN-D[{self.last_end_epochs+epoch+1}].h5')
        self.model.decoder.save(f'../../models/1D-VAE[20250131]/GAN-G[{self.last_end_epochs+epoch+1}].h5')
        random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
        generated_data = self.model.decoder(random_latent_vectors, training=False)
        generated_data = generated_data.numpy()
        gan_data_matrix = np.zeros((len(generated_data),217)).astype(np.float32)
        for i in range(len(gan_data_matrix)):
            gan_data_matrix[i] = generated_data[i].reshape((217))
            gan_data_matrix[i] = scaler.inverse_transform(gan_data_matrix[i].reshape(1, -1))
        reflact = gan_data_matrix[:,1:]
        # fig, ax = plt.subplots(figsize=[6, 4],dpi=400)
        x = range(350, reflact.shape[1]+350)
        for i in range(0, reflact.shape[0]):
          plt.plot(x, reflact[i])
        plt.grid(linestyle = '--',alpha=0.7)
        plt.show()

In [None]:
with tf.device('/device:GPU:0'):
    epochs = 20000
    last_end_epochs = 0 
    cbk = GANMonitor(num_img=65, latent_dim=latent_dim, last_end_epochs=last_end_epochs)
    
    vae = VAE()
    vae.compile()

    dataset = tf.data.Dataset.from_tensor_slices(gan_data_matrix)
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(65, drop_remainder=True)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    
    real_data = dataset
    
    history = vae.fit(
        real_data,
        batch_size=65,
        epochs=epochs,
        verbose=1,
        callbacks=[cbk]
    )