## AUDIO GENERATION

In [None]:
from audioGan import AudioGAN
from ganSetup import GANConfig
import numpy as np
import soundfile as sf
import IPython.display as ipd
import matplotlib.pyplot as plt
import IPython
import librosa
import numpy as np

config = GANConfig()
myGan = AudioGAN(label = config.LABEL, load= True)


### AUTOENCODER PLOT

In [None]:
INDEX = 11
sample = myGan.testData[INDEX]

print("Original:")
IPython.display.display(ipd.Audio(data=sample, rate=config.SAMPLE_RATE))
sf.write(config.AUTO_ENCODER_PATH + "AE_Original_test_" + config.LABEL + ".wav", sample, config.SAMPLE_RATE)

result = myGan.autoencoder.predict(sample.reshape((1, config.AUDIO_SHAPE))).flatten()

sf.write("AE_output.wav", result, 16000)


print("Result:")
IPython.display.display(ipd.Audio(data=result, rate=config.SAMPLE_RATE))
sf.write(config.AUTO_ENCODER_PATH + "AE_Generated_test_" + config.LABEL + ".wav", result, config.SAMPLE_RATE)

fig = plt.figure(figsize=(15, 4))
Original = fig.add_subplot(1, 2, 1)
Result = fig.add_subplot(1, 2, 2)

Original.set_title('Original')
Result.set_title('Result')

Original.plot(sample, '.', color='blue')
Result.plot(result, '.', color='green')

plt.subplots_adjust(wspace=0.2, hspace=0.3)
plt.show()

fig.savefig(config.PICTURE_PATH + "AE_Compare_test_" + config.LABEL + ".png", bbox_inches="tight")


### GENERATOR PLOT

In [None]:
from keras.models import load_model
import numpy as np
import IPython.display as ipd
import matplotlib.pyplot as plt
import soundfile as sf  # Assuming you have the soundfile library installed
import os

# Load models
encoder_path = os.path.join(myGan.model_path, 'encoder.keras')
encoder = load_model(encoder_path)
generator_path = os.path.join(myGan.model_path, 'generator.keras')
generator = load_model(generator_path)

# Function to predict using encoder and generator
def generate_audio_from_real(real_audio):
    # Assuming real_audio is preprocessed and shaped correctly [(1, audio_length)]
    latent_vector = encoder.predict(real_audio.reshape(1, -1))
    generated_audio = generator.predict(latent_vector).flatten()

    # Display and save the audio
    print("Original Audio:")
    ipd.display(ipd.Audio(data=real_audio.flatten(), rate=config.SAMPLE_RATE))
    sf.write(config.AUTO_ENCODER_PATH + "Gen_Original_test_" + config.LABEL + ".wav", real_audio.flatten(), config.SAMPLE_RATE)

    print("Generated Audio:")
    ipd.display(ipd.Audio(data=generated_audio, rate=config.SAMPLE_RATE))
    sf.write(config.AUTO_ENCODER_PATH + "Gen_Generated_test_" + config.LABEL + ".wav", generated_audio, config.SAMPLE_RATE)

    # Plot the original and generated audio waveforms
    fig = plt.figure(figsize=(15, 4))
    Original = fig.add_subplot(1, 2, 1)
    Result = fig.add_subplot(1, 2, 2)

    Original.set_title('Original')
    Result.set_title('Generated')

    Original.plot(real_audio.flatten(), '.', color='blue')
    Result.plot(generated_audio, '.', color='green')

    plt.subplots_adjust(wspace=0.2, hspace=0.3)
    plt.show()

    # Save the plot
    fig.savefig(config.PICTURE_PATH + "Gen_Compare_test_" + config.LABEL + ".png", bbox_inches="tight")

    return generated_audio

# Example usage
INDEX = 10
real_audio = myGan.testData[INDEX]  # Replace this with an actual audio sample
generated_audio = generate_audio_from_real(real_audio)


### Frechet Audio Distance

In [None]:
from frechet_audio_distance import FrechetAudioDistance
 
# Create an instance of FrechetAudioDistance
frechet = FrechetAudioDistance(
    model_name="vggish",   # Options: "vggish", "pann", "clap", "encodec"
    sample_rate=16000,     # Sample rate of the audio files
    use_pca=False,         # Use PCA for dimensionality reduction
    use_activation=False,  # Use activations for computing distance
    verbose=False          # Verbosity mode
)
 
# Define the paths to the original and generated audio files
original_audio_path = 'WavFiles/Autoencoder/AE_Original_test_fname.wav'
generated_audio_path = 'WavFiles/Autoencoder/AE_Generated_test_fname.wav'
 
# Compute the Frechet Audio Distance score
fad_score = frechet.score(original_audio_path, generated_audio_path, dtype="float16")
 
# Print the FAD score
print(f"The Frechet Audio Distance (FAD) is: {fad_score}")

### Spectral Convergence

In [None]:
import numpy as np
import librosa

# Function to compute the spectrogram of a wav file
def compute_spectrogram(wav_file, sr=16000, n_fft=2048, hop_length=512):
    y, sr = librosa.load(wav_file, sr=sr)
    S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
    return S_db

# Function to calculate Spectral Convergence
def calculate_spectral_convergence(real_spectrogram, generated_spectrogram):
    assert real_spectrogram.shape == generated_spectrogram.shape, "Spectrograms must have the same shape"
    diff_norm = np.linalg.norm(real_spectrogram - generated_spectrogram, 'fro')
    real_norm = np.linalg.norm(real_spectrogram, 'fro')
    sc = diff_norm / real_norm
    return sc

# Define the paths to the original and generated audio files
original_audio_path = 'WavFiles/Autoencoder/AE_Original_test_fname.wav'
generated_audio_path = 'WavFiles/Autoencoder/AE_Generated_test_fname.wav'

# Compute spectrograms for the original and generated audio files
real_spectrogram = compute_spectrogram(original_audio_path)
generated_spectrogram = compute_spectrogram(generated_audio_path)

# Calculate Spectral Convergence
sc_score = calculate_spectral_convergence(real_spectrogram, generated_spectrogram)

# Print the SC score
print(f"The Spectral Convergence (SC) is: {sc_score}")