In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, ReLU, MaxPooling2D, Flatten,
    Dense, Dropout, Softmax
)

def create_alexnet(input_shape, num_classes):
    input_layer = Input(shape=input_shape, name="data")

    # Conv1 + ReLU + Pool1
    x = Conv2D(96, kernel_size=11, strides=4, padding="valid", activation=None,
               kernel_initializer="random_normal", bias_initializer="zeros", name="conv1")(input_layer)
    x = ReLU(name="relu1")(x)
    x = MaxPooling2D(pool_size=3, strides=2, name="pool1")(x)

    # Conv2 + ReLU + Pool2
    x = Conv2D(256, kernel_size=5, strides=1, padding="same", activation=None, groups=2,
               kernel_initializer="random_normal", bias_initializer="zeros", name="conv2")(x)
    x = ReLU(name="relu2")(x)
    x = MaxPooling2D(pool_size=3, strides=2, name="pool2")(x)

    # Conv3 + ReLU
    x = Conv2D(384, kernel_size=3, strides=1, padding="same",
               kernel_initializer="random_normal", bias_initializer="zeros", name="conv3")(x)
    x = ReLU(name="relu3")(x)

    # Conv4 + ReLU
    x = Conv2D(384, kernel_size=3, strides=1, padding="same", groups=2,
               kernel_initializer="random_normal", bias_initializer="zeros", name="conv4")(x)
    x = ReLU(name="relu4")(x)

    # Conv5 + ReLU + Pool5
    x = Conv2D(256, kernel_size=3, strides=1, padding="same", groups=2,
               kernel_initializer="random_normal", bias_initializer="zeros", name="conv5")(x)
    x = ReLU(name="relu5")(x)
    x = MaxPooling2D(pool_size=3, strides=2, name="pool5")(x)

    # Flatten for Fully Connected Layers
    x = Flatten(name="flatten")(x)

    # FC6 + ReLU + Dropout
    x = Dense(1024, activation=None, kernel_initializer="random_normal", bias_initializer="zeros", name="fc6")(x)
    x = ReLU(name="relu6")(x)
    x = Dropout(rate=0.5, name="drop6")(x)

    # FC7 + ReLU + Dropout
    x = Dense(1024, activation=None, kernel_initializer="random_normal", bias_initializer="zeros", name="fc7")(x)
    x = ReLU(name="relu7")(x)
    x = Dropout(rate=0.5, name="drop7")(x)

    # FC8 + Softmax for Final Output
    x = Dense(num_classes, activation=None, kernel_initializer="random_normal", bias_initializer="zeros", name="fc8")(x)
    output = Softmax(name="softmax")(x)

    # Create Model
    model = Model(inputs=input_layer, outputs=output, name="AlexNet")
    return model



In [2]:
import numpy as np
# Path to the saved .npy files
spectrograms_file = "C:/Users/mitak/OneDrive - University of Twente/Documents/UTwente/Master Robotics/Module2/Deep Learning/Advesarial-Attack/dataSpectogram/spectrograms.npy"
targets_file = "C:/Users/mitak/OneDrive - University of Twente/Documents/UTwente/Master Robotics/Module2/Deep Learning/Advesarial-Attack/dataSpectogram/targets.npy"

# Load the spectrograms and targets
spectrograms = np.load(spectrograms_file)
targets = np.load(targets_file)

# Verify the data
print(f"Spectrograms shape: {spectrograms.shape}")  # Should be (num_samples, 227, fixed_time_dim)
print(f"Targets shape: {targets.shape}")  # Should be (num_samples,)


Spectrograms shape: (30000, 227, 169)
Targets shape: (30000,)


In [3]:
# Shuffle the data
indices = np.arange(len(spectrograms))
np.random.shuffle(indices)

spectrograms = spectrograms[indices]
targets = targets[indices]


In [4]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Use only 60% of the data
num_samples = len(spectrograms)
num_training_samples = int(0.6 * num_samples)

# Split data into 60% usable data and 40% for adversarial attack
usable_spectrograms = spectrograms[:num_training_samples]
usable_targets = targets[:num_training_samples]
# Expand dimensions to match AlexNet input format (add a channel dimension)
usable_spectrograms = usable_spectrograms[..., np.newaxis]  # Shape: (num_samples, 227, 169, 1)

adversarial_spectrograms = spectrograms[num_training_samples:]
adversarial_targets = targets[num_training_samples:]

# Verify splits
print(f"Usable data: {len(usable_spectrograms)} samples")
print(f"Adversarial data: {len(adversarial_spectrograms)} samples")

# Further split the 60% usable data into training and testing sets
# Convert targets to one-hot encoding
num_classes = len(np.unique(targets))
targets_one_hot = to_categorical(usable_targets, num_classes=num_classes)

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(usable_spectrograms, targets_one_hot, test_size=0.2, random_state=42)

print(f"Training data: {len(X_train)} samples")
print(f"Testing data: {len(X_val)} samples")

Usable data: 18000 samples
Adversarial data: 12000 samples
Training data: 14400 samples
Testing data: 3600 samples


In [5]:
input_shape = (227, 169, 1)  # Include the channel dimension
model = create_alexnet(input_shape=input_shape, num_classes=num_classes)

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Summary of the model
model.summary()


Model: "AlexNet"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 data (InputLayer)           [(None, 227, 169, 1)]     0         
                                                                 
 conv1 (Conv2D)              (None, 55, 40, 96)        11712     
                                                                 
 relu1 (ReLU)                (None, 55, 40, 96)        0         
                                                                 
 pool1 (MaxPooling2D)        (None, 27, 19, 96)        0         
                                                                 
 conv2 (Conv2D)              (None, 27, 19, 256)       307456    
                                                                 
 relu2 (ReLU)                (None, 27, 19, 256)       0         
                                                                 
 pool2 (MaxPooling2D)        (None, 13, 9, 256)        0   

In [6]:
import tensorflow as tf
print(tf.test.is_built_with_cuda())  # Should return True
print(tf.config.list_physical_devices('GPU'))  # Should list at least one GPU


True
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [7]:
# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=32,  # Adjust batch size based on available memory
    epochs=20,      # Adjust number of epochs as needed
    verbose=1       # Display progress during training
)


InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [None]:
# Evaluate on validation data
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=1)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

In [None]:
from scipy.signal import istft
from IPython.display import Audio
import librosa
import numpy as np

#Check the model predictions to verify that the predictions are correct
# Function to reconstruct the audio from the spectrogram
def spectrogram_to_audio(spectrogram, sample_rate=8000, nperseg=None, noverlap=None):
    """
    Converts a spectrogram back to a time-domain audio signal.
    """
    import librosa
    
    # Convert the decibel spectrogram back to amplitude
    spectrogram_amp = librosa.db_to_amplitude(spectrogram, ref=1.0)

    # Infer nperseg and noverlap if not provided
    if nperseg is None or noverlap is None:
        nperseg = spectrogram.shape[0]  # Use the number of frequency bins as nperseg
        noverlap = nperseg - (spectrogram.shape[1] // 2)  # Approximation for overlap

    # Reconstruct the signal using ISTFT
    _, audio = istft(
        spectrogram_amp,
        fs=sample_rate,
        nperseg=nperseg,
        noverlap=noverlap,
        window="hann"
    )
    return audio

# Select a random spectrogram from the validation set
index = 200  # Replace with any index you want to check
sample_spectrogram = X_val[index]
true_target = np.argmax(y_val[index])  # Get the actual label

# Ensure spectrogram is 2D by squeezing the channel dimension
sample_spectrogram_2d = sample_spectrogram.squeeze()  # Shape: (227, 169)

# Predict the target using the trained model
predicted_target = np.argmax(model.predict(sample_spectrogram[np.newaxis, ...]))

# Display the predicted and true target
print(f"True Target: {true_target}")
print(f"Predicted Target: {predicted_target}")

# Convert the spectrogram back to audio
reconstructed_audio = spectrogram_to_audio(sample_spectrogram_2d)

# Play the audio in the notebook
Audio(reconstructed_audio, rate=8000)


In [None]:
#save the model
model.save("alexnet_spectrogram_model.h5")