<a href="https://colab.research.google.com/github/ARBasharat/AudioClassification/blob/master/AudioClassification2D_Gan.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Contact: abbash@iu.edu <br/>
Audio Spectrogram Classification using General Adversarial Network and a 2D-CNN model <br/>

# Import Libraries

In [2]:
import os
import random
import time
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import librosa
from scipy import signal

# Read Data from Google Drive

In [3]:
from google.colab import drive
drive.mount("/content/drive")

train = np.load("drive/My Drive/AudioClassification/audio_train.npy").astype('float32')
test = np.load("drive/My Drive/AudioClassification/audio_test.npy").astype('float32')
train_labels_df = pd.read_csv("drive/My Drive/AudioClassification/labels_train.csv")


Mounted at /content/drive


# Process the data

In [4]:
train_labels = train_labels_df.to_numpy()[:,1]
labels_categorical = keras.utils.to_categorical(train_labels)
print("Training Labels:", labels_categorical.shape)

Training Labels: (942, 10)
Training Data: (942, 30000)


In [6]:
def get_spectrogram(in_data):
  spectrogram = []
  for elem in in_data:
    converted = librosa.stft(elem)
    spectrum, _ = librosa.magphase(converted)
    spectrum = np.abs(spectrum).astype(np.float32)
    norm = spectrum.max()
    spectrum /= norm
    result = np.zeros((1028, 76))
    result[:spectrum.shape[0],:spectrum.shape[1]] = spectrum
    result = result.reshape((result.shape[0], result.shape[1], 1))
    spectrogram.append(result)
  spectrogram = np.array(spectrogram)
  return spectrogram

In [7]:
X_train = get_spectrogram(train)
X_test = get_spectrogram(test)

print("Train Shape: ", X_train.shape)
print("Test Shape: ", X_test.shape)

Train Shape:  (942, 1028, 76, 1)
Test Shape:  (558, 1028, 76, 1)


# Model Definations

In [8]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(257*19*256, use_bias=False, input_shape=(100,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((257, 19, 256)))
    assert model.output_shape == (None, 257, 19, 256)
    # Note: None is the batch size

    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1),
                                     padding='same', use_bias=False))
    assert model.output_shape == (None, 257, 19, 128)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), 
                                     padding='same', use_bias=False))
    assert model.output_shape == (None, 514, 38, 64)
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), 
                                     padding='same', use_bias=False, 
                                     activation='tanh'))
    assert model.output_shape == (None, 1028, 76, 1)
    
    model.summary()

    return model

generator = make_generator_model()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1250048)           125004800 
_________________________________________________________________
batch_normalization (BatchNo (None, 1250048)           5000192   
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 1250048)           0         
_________________________________________________________________
reshape (Reshape)            (None, 257, 19, 256)      0         
_________________________________________________________________
conv2d_transpose (Conv2DTran (None, 257, 19, 128)      819200    
_________________________________________________________________
batch_normalization_1 (Batch (None, 257, 19, 128)      512       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 257, 19, 128)      0

In [9]:
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
                                     input_shape=[1028, 76, 1]))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1))

    model.summary()
    return model

discriminator = make_discriminator_model()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 514, 38, 64)       1664      
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 514, 38, 64)       0         
_________________________________________________________________
dropout (Dropout)            (None, 514, 38, 64)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 257, 19, 128)      204928    
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU)    (None, 257, 19, 128)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 257, 19, 128)      0         
_________________________________________________________________
flatten (Flatten)            (None, 625024)           

In [10]:
# Method to compute cross entropy loss
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [11]:
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [12]:
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

In [13]:
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

In [14]:
checkpoint_dir = '/training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [18]:
noise_dim = 100
num_examples_to_generate = 16
seed = tf.random.normal([num_examples_to_generate, noise_dim])

In [21]:
@tf.function
def train_step(images):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
      generated_images = generator(noise, training=True)

      real_output = discriminator(images, training=True)
      fake_output = discriminator(generated_images, training=True)

      gen_loss = generator_loss(fake_output)
      disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [19]:
def train(dataset, epochs):
  for epoch in range(epochs):
    start = time.time()

    for image_batch in dataset:
      train_step(image_batch)
  
    # Save the model every 15 epochs
    if (epoch + 1) % 15 == 0:
      checkpoint.save(file_prefix = checkpoint_prefix)

    print('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

In [22]:
BUFFER_SIZE = 60000
BATCH_SIZE = 32
EPOCHS = 50
# Batch and shuffle the data
train_dataset = tf.data.Dataset.from_tensor_slices(X_train).shuffle(
    BUFFER_SIZE).batch(BATCH_SIZE)
train(train_dataset, EPOCHS)

Time for epoch 1 is 14.135028839111328 sec
Time for epoch 2 is 12.717559337615967 sec
Time for epoch 3 is 12.878023862838745 sec
Time for epoch 4 is 12.862932443618774 sec
Time for epoch 5 is 12.945351600646973 sec
Time for epoch 6 is 13.007014274597168 sec
Time for epoch 7 is 13.050602436065674 sec
Time for epoch 8 is 13.093243598937988 sec
Time for epoch 9 is 13.141200542449951 sec
Time for epoch 10 is 13.177473783493042 sec
Time for epoch 11 is 13.190034627914429 sec
Time for epoch 12 is 13.232048749923706 sec
Time for epoch 13 is 13.260963201522827 sec
Time for epoch 14 is 13.272834062576294 sec
Time for epoch 15 is 21.07028079032898 sec
Time for epoch 16 is 13.241860151290894 sec
Time for epoch 17 is 13.308809995651245 sec
Time for epoch 18 is 13.313120603561401 sec
Time for epoch 19 is 13.316790103912354 sec
Time for epoch 20 is 13.32909345626831 sec
Time for epoch 21 is 13.380089044570923 sec
Time for epoch 22 is 13.39589524269104 sec
Time for epoch 23 is 13.435005903244019 sec


In [23]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fe8bc06c208>

In [27]:
predictions = []
for i in range(0, X_test.shape[0]):
  y = X_test[i].reshape(1, X_test[0].shape[0], X_test[0].shape[1], X_test[0].shape[2])
  prediction = discriminator(y, training=False)
  predictions.append(float(prediction))

# Predict labels using 2D-CNN model

In [30]:
def get_spectrogram_XX(in_data):
  spectrogram = []
  for elem in in_data:
    converted = librosa.stft(elem)
    spectrum, _ = librosa.magphase(converted)
    spectrum = np.abs(spectrum).astype(np.float32)
    norm = spectrum.max()
    spectrum /= norm
    result = np.zeros((spectrum.shape[0], 74))
    result[:spectrum.shape[0],:spectrum.shape[1]] = spectrum
    result = result.reshape((result.shape[0], result.shape[1], 1))
    spectrogram.append(result)
  spectrogram = np.array(spectrogram)
  return spectrogram

X_test_CNN = get_spectrogram_XX(test)

In [28]:
model_1 = tf.keras.models.load_model("drive/My Drive/AudioClassification/model_2")

In [32]:
predictions_probabilities = model_1.predict(X_test_CNN)
prediction_classes = model_1.predict_classes(X_test_CNN)

# Assign Labels to the test data

In [33]:
## Assign Label predicted by CNN if image is real, otherwise assign label 2
test_labels = []
for i in range(0, X_test.shape[0]):
  if predictions[i] <= 0:
    label = 2 
  else:
    label = prediction_classes[i]
  test_labels.append(label)

In [34]:
import collections
counter=collections.Counter(test_labels)
print(counter)
len(counter)

Counter({3: 184, 7: 55, 1: 52, 4: 51, 8: 49, 5: 43, 9: 43, 6: 39, 0: 38, 2: 4})


10

In [35]:
pd.DataFrame(test_labels).to_csv("submission_GAN.csv")

In [39]:
print("Test_Data_Index", ",", "Assigned Label")
for i in range(0, len(test_labels)):
  print(i, ",", test_labels[i])

Test_Data_Index , Assigned Label
0 , 5
1 , 9
2 , 4
3 , 1
4 , 4
5 , 5
6 , 9
7 , 3
8 , 3
9 , 7
10 , 7
11 , 0
12 , 1
13 , 8
14 , 0
15 , 7
16 , 7
17 , 3
18 , 5
19 , 3
20 , 3
21 , 3
22 , 9
23 , 3
24 , 3
25 , 5
26 , 7
27 , 7
28 , 0
29 , 4
30 , 4
31 , 1
32 , 3
33 , 1
34 , 6
35 , 3
36 , 1
37 , 3
38 , 9
39 , 9
40 , 0
41 , 1
42 , 5
43 , 9
44 , 8
45 , 7
46 , 3
47 , 3
48 , 3
49 , 6
50 , 9
51 , 6
52 , 0
53 , 0
54 , 8
55 , 3
56 , 5
57 , 3
58 , 3
59 , 3
60 , 3
61 , 7
62 , 8
63 , 7
64 , 0
65 , 0
66 , 4
67 , 6
68 , 3
69 , 3
70 , 3
71 , 4
72 , 3
73 , 5
74 , 1
75 , 4
76 , 9
77 , 7
78 , 4
79 , 5
80 , 0
81 , 1
82 , 6
83 , 9
84 , 7
85 , 3
86 , 3
87 , 3
88 , 1
89 , 6
90 , 0
91 , 8
92 , 0
93 , 8
94 , 7
95 , 7
96 , 3
97 , 3
98 , 3
99 , 3
100 , 3
101 , 3
102 , 7
103 , 3
104 , 7
105 , 3
106 , 3
107 , 4
108 , 1
109 , 3
110 , 3
111 , 3
112 , 7
113 , 7
114 , 1
115 , 9
116 , 9
117 , 4
118 , 5
119 , 1
120 , 5
121 , 9
122 , 3
123 , 4
124 , 3
125 , 3
126 , 6
127 , 3
128 , 0
129 , 3
130 , 8
131 , 7
132 , 3
133 , 3
134 ,