# Setup

In [None]:
import librosa
import time
import numpy as np
import tensorflow as tf
import IPython
import IPython.display as ipd 
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from keras.optimizers import Adam

%matplotlib inline
matplotlib.style.use('ggplot')

In [None]:
from audioGan import *
from ganSetup import *
from ganModels import *

In [None]:
USE_GPU = True

if USE_GPU:
    device = '/device:GPU:0'
else:
    device = '/cpu:0'
    
print('Using device: ', device)

config = GANConfig()

# MOSEI Database

In [None]:
train = pd.read_csv(config.DATASET_PATH + "mosei_train_updated.csv")
test  = pd.read_csv(config.DATASET_PATH + "mosei_test_updated.csv")
print(train.label.unique())

In [None]:
# Count the number of samples for each label
category_group = train.groupby('label').size()

# Sort the counts
sorted_category_group = category_group.sort_values()

# Plot the number of audio samples per category
plt.figure(figsize=(16, 10))
plot = sorted_category_group.plot(kind='bar', title="Number of Audio Samples per Category")
plot.set_xlabel("Category")
plot.set_ylabel("Number of Samples")
plt.show()

In [None]:
fname = config.DATASET_PATH + "audio_train/_4K620KW_Is_30.5020_36.0620.wav"   # Hi-hat
data, _ = librosa.core.load(fname, sr=config.SAMPLE_RATE, res_type='kaiser_fast')
print("Total samples (frames) = ", data.shape)
print(data)
IPython.display.display(ipd.Audio(filename=fname))
plt.plot(data, '-', )
plt.show()

In [None]:
data = data[:2*config.SAMPLE_RATE]
mfcc = librosa.feature.mfcc(y=data, sr = config.SAMPLE_RATE, n_mfcc=40)
mel = librosa.feature.melspectrogram(y=data, sr=config.SAMPLE_RATE)
# Display the shapes of the computed features
print("MFCC Shape: ", mfcc.shape)
print("Mel Spectrogram Shape: ", mel.shape)

# Plotting
fig, axs = plt.subplots(2, 1, figsize=(10, 10))  # Create a figure with two subplots

# Display MFCC
axs[0].imshow(mfcc, cmap='hot', interpolation='nearest', aspect='auto', origin='lower')
axs[0].set_title('MFCC')

# Convert Mel Spectrogram to decibels and display
mel_dB = librosa.power_to_db(mel, ref=np.max)
img = axs[1].imshow(mel_dB, cmap='hot', interpolation='nearest', aspect='auto', origin='lower')
fig.colorbar(img, ax=axs[1], format='%+2.0f dB')
axs[1].set_title('Mel Spectrogram')

plt.show()

# GAN Architecture

### Discriminator

In [None]:
d = discriminator(config.AUDIO_SHAPE)
d.summary()

### Generator

In [None]:
g = generator(config.NOISE_DIM, config.AUDIO_SHAPE)
g.summary()

### Stacked Generator and Discriminator

In [None]:
s = stacked_G_D(g,d)
s.summary()

### Autoencoder

In [None]:
e = encoder(config.AUDIO_SHAPE, config.ENCODE_SIZE)
a = autoEncoder(e, g)
a.summary()

# Audio GAN 

In [12]:
myGan = AudioGAN(label = config.LABEL)

### Running Autoencoder

In [15]:
start = time.time()
print(len(myGan.trainData))
batch=32
if len(myGan.trainData) > batch:  # Ensure there are more samples than the batch size
    myGan.train_autoencoder(myGan.trainData ,epochs=1, batch_size=32)
else:
    print("Not enough data to form a batch.")
end = time.time()

if len(myGan.trainData) > batch:
    total = round(end - start, 2)
    print("\nExecution Time: ", total, "s")

2087
[1m 1/66[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m38:49[0m 36s/step - accuracy: 0.0000e+00 - loss: 1.7397

In [None]:
import soundfile as sf

INDEX = 11
sample = myGan.trainData[INDEX]

print("Original:")
IPython.display.display(ipd.Audio(data=sample, rate=config.SAMPLE_RATE))
sf.write(config.AUTO_ENCODER_PATH + "AE_Original_" + config.LABEL + ".wav", sample, config.SAMPLE_RATE)

result = myGan.autoencoder.predict(sample.reshape((1, config.AUDIO_SHAPE))).flatten()

print("Result:")
IPython.display.display(ipd.Audio(data=result, rate=config.SAMPLE_RATE))
sf.write(config.AUTO_ENCODER_PATH + "AE_Result_" + config.LABEL + ".wav", result, config.SAMPLE_RATE)

fig = plt.figure(figsize=(15, 4))
Original = fig.add_subplot(1, 2, 1)
Result = fig.add_subplot(1, 2, 2)

Original.set_title('Original')
Result.set_title('Result')

Original.plot(sample, '.', color='blue')
Result.plot(result, '.', color='green')

plt.subplots_adjust(wspace=0.2, hspace=0.3)
plt.show()

fig.savefig(config.PICTURE_PATH + "AE_Compare_" + config.LABEL + ".png", bbox_inches="tight")


In [None]:
fig      = plt.figure(figsize=(1, 2))
OriginalZ = fig.add_subplot(1, 2, 1)
ResultZ   = fig.add_subplot(1, 2, 2)

OriginalZ.set_title('Original Zoom')
ResultZ.set_title('Result Zoom')

ZoomInit = 40000
ZoomSize = 100

OriginalZ.plot(sample[ZoomInit:ZoomInit+ZoomSize], '.')
OriginalZ.plot(sample[ZoomInit:ZoomInit+ZoomSize], '-')
ResultZ.plot(result[ZoomInit:ZoomInit+ZoomSize], '.')                
ResultZ.plot(result[ZoomInit:ZoomInit+ZoomSize], '-')

plt.gcf().set_size_inches(30, 10)
plt.subplots_adjust(wspace=0.1,hspace=0.3)
plt.show()

fig.savefig(config.PICTURE_PATH + "AE_CompareZ_" + config.LABEL + ".png", bbox_inches = "tight")

### Running GAN

In [None]:
start = time.time()
myGan.train_gan(epochs = 5, batch = 32, save_interval = 2)
end = time.time()
total = round(end - start, 2)
print("Execution Time: ", total, "s")

### Discriminator and Generator Loss over Epochs

In [None]:
plt.title('Losses over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.plot(myGan.disLossHist, '-', label = 'Discriminator', color = 'blue')
plt.plot(myGan.genLossHist, '-', label = 'Generator',     color = 'red')
plt.legend(loc = 'best')

plt.gcf().set_size_inches(15, 5)
plt.savefig(config.PICTURE_PATH + "D_G_Loss.png", bbox_inches='tight')
plt.show() 

### Test Sample

In [None]:
lin, col = 1, 6
fig = plt.figure(figsize=(lin, col))
subPlotS = []
for i in range(lin):
    for j in range(col):
        subPlotS.append(fig.add_subplot(lin , col, (i*col)+j+1))
        
        
for i in range(lin):
    for j in range(col):
        gen_noise = np.random.normal(0, 1, (1,config.NOISE_DIM))
        gen_test = myGan.gen.predict(gen_noise).flatten()
        #IPython.display.display(ipd.Audio(data=gen_test, rate=SAMPLE_RATE))
        subPlotS[(i*col)+j].plot(gen_test, '.', color='red')



plt.gcf().set_size_inches(80, 10)
plt.subplots_adjust(wspace=0.2,hspace=0.3)
plt.show()

fig.savefig(config.PICTURE_PATH + "Generated_"+ config.LABEL + ".png", bbox_inches='tight')

### Original Samples

In [None]:
lin, col = 1, 6
fig = plt.figure(figsize=(lin, col))
subPlotS = []
for i in range(lin):
    for j in range(col):
        subPlotS.append(fig.add_subplot(lin , col, (i*col)+j+1))
        
        
for i in range(lin):
    for j in range(col):
        random_index = np.random.randint(0, len(myGan.trainData))
        original = myGan.trainData[random_index]
        #IPython.display.display(ipd.Audio(data=original, rate=SAMPLE_RATE))
        subPlotS[(i*col)+j].plot(original, '.', color='blue')

plt.gcf().set_size_inches(80, 10)
plt.subplots_adjust(wspace=0.2,hspace=0.3)
plt.show()

fig.savefig(config.PICTURE_PATH + "Originals_"+ config.LABEL + ".png", bbox_inches='tight')

### Visualize Conv 1D Filters

#### Discriminator

In [None]:
# Obs: lin x col = filters
lin, col = 8, 4

kernelSize = len(myGan.dis.layers[1].get_weights()[0])
filters    = len(myGan.dis.layers[1].get_weights()[0][0][0])

print("Filters: ", filters)
print("Kernel Size: ", kernelSize)

W = myGan.dis.layers[1].get_weights()[0].reshape(filters,1,kernelSize)

print(W.shape)
fig = plt.figure(figsize=(lin, col))
subPlotS = []
for i in range(lin):
    for j in range(col):
        subPlotS.append(fig.add_subplot(lin , col, (i*col)+j+1))
        
layNum = 1     
for i in range(lin):
    for j in range(col):
        convFilter = W[(i*col)+j][0]
        subPlotS[(i*col)+j].plot(convFilter, '.-', color='purple')

plt.gcf().set_size_inches(20, 20)
plt.subplots_adjust(wspace=0.2,hspace=0.3)
plt.show()

#### Generator

In [None]:
# Obs: lin x col = filters
lin, col = 4, 4

kernelSize = len(myGan.gen.layers[4].get_weights()[0])
filters    = len(myGan.gen.layers[4].get_weights()[0][0][0])

print("Filters: ", filters)
print("Kernel Size: ", kernelSize)

W = myGan.gen.layers[4].get_weights()[0].reshape(filters,1,kernelSize)

print(W.shape)
fig = plt.figure(figsize=(lin, col))
subPlotS = []
for i in range(lin):
    for j in range(col):
        subPlotS.append(fig.add_subplot(lin , col, (i*col)+j+1))
        
layNum = 1     
for i in range(lin):
    for j in range(col):
        convFilter = W[(i*col)+j][0]
        subPlotS[(i*col)+j].plot(convFilter, '.-', color='purple')

plt.gcf().set_size_inches(20, 20)
plt.subplots_adjust(wspace=0.2,hspace=0.3)
plt.show()