In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from scipy import signal
from scipy.stats import pearsonr
# import pywt  # Continuous Wavelet Transform
import copy
import scipy.stats as st
from scipy.special import comb
import seaborn as sns
from sympy import *
import math

import kennard_stone as ks 

import tensorflow_addons as tfa

print(tf.__version__)

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
path = '../../Datas/Paper_data/土壤有机质数据/2024第二批数据(96个土样)/re_vis-NIR.csv'
data = pd.read_csv(path)

In [None]:
data.head()

In [None]:
X = data.loc[:,"X400":"X2400"].values.astype("float32")
Y = data["SOM"].values.astype("float32")
wavelengths = np.linspace(400, 2400, X.shape[1])
train_data = data.values.astype("float32")

In [None]:
def show_hyperspectral_image(_data, title=None, x_label_start=0, sample_interval=10):
    y = _data
    x = range(0, _data.shape[1])
    axis_x_label = range(x_label_start, y.shape[1] * sample_interval + x_label_start, sample_interval)
    fig, ax = plt.subplots(figsize=[6, 4],dpi=400)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    for i in range(0, y.shape[0]):
        plt.plot(x, y[i])
    xticks_interval = 20  
    # xticks_interval = 200  
    plt.xticks(x[::xticks_interval], axis_x_label[::xticks_interval], rotation=0)
    plt.xlabel('Wavelength/nm', fontsize=13)
    plt.ylabel('Reflectance', fontsize=13)
    plt.title(title, fontsize=15)
    plt.grid(linestyle = '--',alpha=0.7)
    plt.show()

def SG(data, w=11, p=2):
    return signal.savgol_filter(data, w, p)

In [None]:
show_hyperspectral_image(SG(X),'Raw',400,10)

In [None]:
x_train, x_test, y_train, y_test = ks.train_test_split(train_data[:,1:], train_data[:,:1], test_size=0.3)

gan_train_data = np.hstack((y_train,x_train))

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
normalized_data = scaler.fit_transform(gan_train_data)

# reconstructed_data = scaler.inverse_transform(normalized_data)

gan_data_matrix = np.zeros((len(normalized_data),217,1)).astype(np.float32)
for i in range(len(normalized_data)):
    gan_data_matrix[i] = normalized_data[i].reshape((217,1))
gan_data_matrix.shape

In [None]:
latent_dim = 100      # 噪声向量维度
critic_steps = 5      # 判别器训练次数/生成器1次
gp_weight = 10       # 梯度惩罚系数
signal_length = 217   # 信号长度

In [None]:
def build_generator():
    model = tf.keras.Sequential([
        layers.Input(shape=(latent_dim,)),
        layers.Dense(32, activation='relu', kernel_initializer='he_normal'),
        layers.BatchNormalization(),
        layers.Dense(64, activation='relu', kernel_initializer='he_normal'),
        layers.BatchNormalization(),
        layers.Dense(128, activation='relu', kernel_initializer='he_normal'),
        layers.BatchNormalization(),
        layers.Dense(signal_length, activation='tanh')
    ])
    return model
g_model = build_generator()
g_model.summary()

In [None]:
def build_critic():
    model = tf.keras.Sequential([
        layers.Input(shape=(signal_length, 1)), 
        layers.Conv1D(32, 5, strides=2, padding='same'),
        layers.LeakyReLU(alpha=0.2),
        layers.Dropout(0.3),
        
        layers.Conv1D(64, 5, strides=2, padding='same'),
        layers.LeakyReLU(alpha=0.2),
        layers.Dropout(0.3),

        layers.Flatten(),
        layers.Dense(128),
        layers.LeakyReLU(alpha=0.2),
        layers.Dense(1) 
    ])
    return model
d_model = build_critic()
d_model.summary()

In [None]:
def gradient_penalty(batch_size, real_samples, fake_samples, critic):
    alpha = tf.random.normal([batch_size, 1, 1], 0.0, 1.0)
    interpolated = alpha * real_samples + (1 - alpha) * fake_samples
    
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = critic(interpolated)
    
    gradients = tape.gradient(pred, interpolated)
    norm = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1,2]))
    gp = tf.reduce_mean((norm - 1.0)**2)
    return gp

In [None]:
class WGAN_GP(keras.Model):
    def __init__(self, generator, critic, **kwargs):
        super().__init__(**kwargs)
        self.generator = generator
        self.critic = critic
        self.gp_weight = gp_weight
        self.generator_optimizer = tf.keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.0002, decay_steps=20000, decay_rate=0.9, staircase=True), beta_1=0.5, beta_2=0.999)
        self.critic_optimizer = tf.keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.0002, decay_steps=20000, decay_rate=0.9, staircase=True), beta_1=0.5, beta_2=0.999)


    def compile(self, **kwargs):
        super().compile(**kwargs)
        self.critic_loss_tracker = tf.keras.metrics.Mean(name="c_loss")
        self.generator_loss_tracker = tf.keras.metrics.Mean(name="g_loss")

    @property
    def metrics(self):
        return [self.critic_loss_tracker, self.generator_loss_tracker]

    def train_step(self, real_data):
        batch_size = tf.shape(real_data)[0]
        
        for _ in range(critic_steps):
            noise = tf.random.normal(shape=(batch_size, latent_dim))
            with tf.GradientTape() as tape:
                fake_data = self.generator(noise)
                fake_data = tf.reshape(fake_data, [-1, signal_length, 1])
                
                real_output = self.critic(real_data)
                fake_output = self.critic(fake_data)

                mse_loss = tf.reduce_mean(tf.square(real_data - fake_data))

                critic_loss = tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)
                
                gp = gradient_penalty(batch_size, real_data, fake_data, self.critic)
                total_critic_loss = critic_loss + gp * self.gp_weight + mse_loss* self.gp_weight*0.5
                
            gradients = tape.gradient(total_critic_loss, self.critic.trainable_variables)
            self.critic_optimizer.apply_gradients(zip(gradients, self.critic.trainable_variables))
        
        noise = tf.random.normal(shape=(batch_size, latent_dim))
        with tf.GradientTape() as tape:
            generated = self.generator(noise)
            generated = tf.reshape(generated, [-1, signal_length, 1])
            gen_output = self.critic(generated)    
            generator_loss = -tf.reduce_mean(gen_output)
            
        gradients = tape.gradient(generator_loss, self.generator.trainable_variables)
        self.generator_optimizer.apply_gradients(zip(gradients, self.generator.trainable_variables))
        
        self.critic_loss_tracker.update_state(total_critic_loss)
        self.generator_loss_tracker.update_state(generator_loss)
        
        return {
            "critic_loss": self.critic_loss_tracker.result(),
            "generator_loss": self.generator_loss_tracker.result()
        }

In [None]:
class GANMonitor(keras.callbacks.Callback):
    def __init__(self, num_img=65, latent_dim=128, last_end_epochs=0):
        self.num_img = num_img
        self.latent_dim = latent_dim
        self.last_end_epochs = last_end_epochs

    def on_epoch_end(self, epoch, logs=None):
      if (self.last_end_epochs+epoch+1) % 100 == 0:
        self.model.critic.save(f'../../models/1D-AWGAN-GP[20250131]/GAN-D[{self.last_end_epochs+epoch+1}].h5')
        self.model.generator.save(f'../../models/1D-AWGAN-GP[20250131]/GAN-G[{self.last_end_epochs+epoch+1}].h5')
        random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
        generated_data = self.model.generator(random_latent_vectors, training=False)
        generated_data = generated_data.numpy()
        gan_data_matrix = np.zeros((len(generated_data),217)).astype(np.float32)
        for i in range(len(gan_data_matrix)):
            gan_data_matrix[i] = generated_data[i].reshape((217))
            gan_data_matrix[i] = scaler.inverse_transform(gan_data_matrix[i].reshape(1, -1))
        reflact = gan_data_matrix[:,1:]
        # fig, ax = plt.subplots(figsize=[6, 4],dpi=400)
        x = range(350, reflact.shape[1]+350)
        for i in range(0, reflact.shape[0]):
          plt.plot(x, reflact[i])
        plt.grid(linestyle = '--',alpha=0.7)
        plt.show()

In [None]:
with tf.device('/device:GPU:0'):
    epochs = 20000
    last_end_epochs = 0 
    cbk = GANMonitor(num_img=65, latent_dim=latent_dim, last_end_epochs=last_end_epochs)

    generator = build_generator()
    critic = build_critic()
    if last_end_epochs != 0:
        generator = tf.keras.models.load_model(f'../../models/1D-AWGAN-GP[20250131]/GAN-G[{last_end_epochs}].h5')
        critic = tf.keras.models.load_model(f'../../models/1D-AWGAN-GP[20250131]/GAN-D[{last_end_epochs}].h5')
    
    wgan_gp = WGAN_GP(generator=generator, critic=critic)
    wgan_gp.compile()

    dataset = tf.data.Dataset.from_tensor_slices(gan_data_matrix)
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(65, drop_remainder=True)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

    real_signals = dataset
    
    history = wgan_gp.fit(
        real_signals,
        batch_size=65,
        epochs=epochs,
        verbose=1,
        callbacks=[cbk]
    )