In [1]:
# The research paper "Parkinson’s Disease Detection Based on Spectrogram-Deep Convolutional Generative Adversarial Network Sample Augmentation" utilized a Spectrogram Deep Convolutional Generative Adversarial Network (S-DCGAN) for sample augmentation to enhance voiceprint recognition accuracy. The training process involved using the Stochastic Gradient Descent algorithm (SGD) with a batch size of 16, Leaky ReLu slope of 0.2, and Adam optimizer with a learning rate of 0.0002. The S-DCGAN model was trained for 300 epochs, with the loss values converging to specific values for stability

# S-DCGAN Model: Incorporates SN for all layers in the generator (G) and replaces Batch Norm with Spectral Norm in the discriminator (D).
# ResNet50 Classifier: Utilized for voiceprint feature extraction and classification, enhancing recognition performance.
# Optimizers and Hyperparameters:
# Optimizer: Adam optimizer with a learning rate of 0.0002.
# Batch Size: Set to 16 for training stability.
# Leaky ReLu Slope: Maintained at 0.2 for activation functions.
# Training Epochs: Trained for 300 epochs to achieve convergence.
# Loss Functions:
# Generator Loss: Minimizes error in generating fake samples and feature matching process.
# Discriminator Loss: Maximizes network output in a preset manner.
# write a code to train this model and evaluate it

# assume the txt data file names as train_data and test_data

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Reshape, Conv2D, Conv2DTranspose, Flatten, Dropout
from tensorflow.keras.layers import BatchNormalization, LeakyReLU, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50




In [3]:
# Define the generator network
def build_generator(input_dim, alpha=0.2):
    model = Sequential()
    model.add(Dense(256 * 8 * 8, input_dim=input_dim))
    model.add(LeakyReLU(alpha=alpha))
    model.add(Reshape((8, 8, 256)))
    model.add(Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=alpha))
    model.add(Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=alpha))
    model.add(Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', activation='tanh'))
    return model

# Define the discriminator network
def build_discriminator(alpha=0.2):
    model = Sequential()
    model.add(Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=(64, 64, 1)))
    model.add(LeakyReLU(alpha=alpha))
    model.add(Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=alpha))
    model.add(Conv2D(256, (5, 5), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=alpha))
    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Define the GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = generator.inputs
    generated_image = generator.outputs
    gan_output = discriminator(generated_image)
    model = tf.keras.models.Model(gan_input, gan_output)
    return model

In [32]:
import pandas as pd
train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')
# Normalize the data
train_data = train_data / 255.0
test_data = test_data / 255.0

In [33]:
train_data.shape, test_data.shape

((1039, 29), (167, 28))

In [26]:
# Hyperparameters
latent_dim = 100
alpha = 0.2
batch_size = 16
epochs = 300
learning_rate = 0.0002

In [27]:
# Build and compile networks
generator = build_generator(latent_dim, alpha)
discriminator = build_discriminator(alpha)
gan = build_gan(generator, discriminator)

In [28]:
discriminator.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy')
gan.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy')

In [29]:
def train_gan(generator, discriminator, gan, train_data, test_data, latent_dim, batch_size, epochs):
    for epoch in range(epochs):
        # Train discriminator
        idx = np.random.randint(0, train_data.shape[0], batch_size)
        X_train_real = tf.reshape(train_data[idx], (-1, 64, 64, 1))  # Reshape to match discriminator input shape
        X_train_fake = generator.predict(np.random.normal(0, 1, (batch_size, latent_dim)))
        X_train = tf.concat([X_train_real, X_train_fake], axis=0)
        y_train = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1))))
        discriminator.trainable = True
        d_loss = discriminator.train_on_batch(X_train, y_train)

        # Train generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        y_gan = np.ones((batch_size, 1))
        discriminator.trainable = False
        g_loss = gan.train_on_batch(noise, y_gan)

        # Print losses
        print(f"Epoch: {epoch+1}/{epochs}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")

    train_data.shape, test_data.shape

In [30]:
print(generator.input_shape)
print(discriminator.input_shape)
print(gan.input_shape)

(None, 100)
(None, 64, 64, 1)
(None, 100)


In [31]:
train_gan(generator, discriminator, gan, train_data, test_data, latent_dim, batch_size, epochs)

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got array([1013,   88,   44,  688,  361, 1008,  962,  536,  263,  472,  736,
         59,  604,  364,   51,  253])