# Checking which GPU is alocated

In [1]:
!nvidia-smi

Thu Jan 26 18:08:55 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.82.01    Driver Version: 470.82.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

<hr>

# Define imports and variables

In [2]:
#!pip install tensorflow-addons

#!pip install librosa

import tensorflow as tf
print("Tensorflow Version: ", tf.__version__)

Tensorflow Version:  2.6.4


In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from tensorflow.keras import layers
import math
import matplotlib.pyplot as plt
from tqdm import tqdm
import librosa 
from glob import glob

import random
from functools import partial
import warnings
warnings.filterwarnings("ignore")               # ignore some warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'        # ignore some warnings

import IPython.display as ipd
from tensorflow.keras import mixed_precision

<hr>

# Code

In [4]:
# GLOBAL VARIABLES
min_signal_rate = 0.02
max_signal_rate = 0.95
ema = 0.999

# AUXILIARY FUNCTIONS TO DECLARE
def sinusoidal_embedding(x):
    embedding_min_frequency = 1.0
    embedding_max_frequency = 1000.0
    embedding_dims = 32
    frequencies = tf.exp(
        tf.linspace(
            tf.math.log(embedding_min_frequency),
            tf.math.log(embedding_max_frequency),
            embedding_dims // 2,
        )
    )
    angular_speeds = 2.0 * math.pi * frequencies
    embeddings = tf.concat(
        [tf.sin(angular_speeds * x), tf.cos(angular_speeds * x)], axis=3
    )
    return embeddings

def ResidualBlock(width):
    def apply(x):
        input_width = x.shape[3]
        if input_width == width:
            residual = x
        else:
            residual = layers.Conv2D(width, kernel_size=1)(x)
        x = layers.BatchNormalization(center=False, scale=False)(x)
        x = layers.Conv2D(
            width, kernel_size=3, padding="same", activation=keras.activations.swish
        )(x)
        x = layers.Conv2D(width, kernel_size=3, padding="same")(x)
        x = layers.Add()([x, residual])
        return x

    return apply

def DownBlock(width, block_depth):
    def apply(x):
        x, skips = x
        for _ in range(block_depth):
            x = ResidualBlock(width)(x)
            skips.append(x)
        x = layers.AveragePooling2D(pool_size=2)(x)
        return x

    return apply

def UpBlock(width, block_depth, attention=False):
    def apply(x):
        x, skips = x
        x = layers.UpSampling2D(size=2, interpolation="bilinear")(x)
        for _ in range(block_depth):
            skip = skips.pop()
            x = layers.Concatenate()([x, skip] if not attention else [
                x, skip, layers.MultiHeadAttention(
                    num_heads=4, key_dim=1, attention_axes=(1,2)
                )(x, skip)
            ])
            x = ResidualBlock(width)(x)
        return x

    return apply

def get_network(widths, block_depth, dim1=256, dim2=128, channels=1, attention=False):
    noisy_input = keras.Input(shape=(dim1, dim2, channels))
    noise_variances = keras.Input(shape=(1, 1, 1))
    
    upsample_shape = (dim1, dim2)

    e = layers.Lambda(sinusoidal_embedding)(noise_variances)
    e = layers.UpSampling2D(size=upsample_shape, interpolation="nearest")(e)

    x = layers.Conv2D(widths[0], kernel_size=1)(noisy_input)
    x = layers.Concatenate()([x, e])

    skips = []
    for width in widths[:-1]:
        x = DownBlock(width, block_depth)([x, skips])

    for _ in range(block_depth):
        x = ResidualBlock(widths[-1])(x)

    for idx, width in enumerate(reversed(widths[:-1])):
        x = UpBlock(width, block_depth, attention=attention and idx ==0)([x, skips])

    x = layers.Conv2D(channels, kernel_size=1, kernel_initializer="zeros")(x)

    return keras.Model([noisy_input, noise_variances], x, name="residual_unet")

def spectral_norm(pred, real):
    """Calculate difference in spectral norm between two batches of spectrograms."""
    norm_real = tf.norm(real, axis=(1,2)) + 1e-6
    norm_pred = tf.norm(pred, axis=(1,2)) + 1e-6
    return tf.reduce_mean(tf.abs(norm_real - norm_pred) / norm_real)

def time_derivative(pred, real, window=1):
    real_derivative = real[:, :-window, :, :] - real[:, window:, :, :]
    pred_derivative = pred[:, :-window, :, :] - pred[:, window:, :, :]
    return tf.reduce_mean(tf.keras.losses.MSE(real_derivative, pred_derivative))

# CNN model that will be used
class DDIM(keras.Model):
    """DDIM model modified from this tutorial: https://keras.io/examples/generative/ddim/"""
    
    def __init__(self, widths, block_depth, attention=False, dim1=256, dim2=128):
        super().__init__()

        self.normalizer = layers.Normalization(axis=(2,3))
        self.network = get_network(widths, block_depth, attention=attention, dim1=dim1, dim2=dim2)
        self.ema_network = keras.models.clone_model(self.network)
        self.spec_mod = 0
        self.dx_mod = 0

    def compile(self, **kwargs):
        super().compile(**kwargs)

        self.noise_loss_tracker = keras.metrics.Mean(name="n_loss")
        self.data_loss_tracker = keras.metrics.Mean(name="d_loss")
        
        self.noise_spec_tracker = keras.metrics.Mean(name="n_spec")
        self.data_spec_tracker = keras.metrics.Mean(name="d_spec")
        
        self.noise_dx_tracker = keras.metrics.Mean(name="n_dx")
        self.data_dx_tracker = keras.metrics.Mean(name="d_dx")
        
        self.noise_total_tracker = keras.metrics.Mean(name="n_total")
        self.data_total_tracker = keras.metrics.Mean(name="d_total")

    @property
    def metrics(self):
        return [
            self.noise_loss_tracker, 
            self.data_loss_tracker,
            
            self.noise_spec_tracker,
            self.data_spec_tracker,
            
            self.noise_dx_tracker,
            self.data_dx_tracker,
            
            self.noise_total_tracker,
            self.data_total_tracker
        ]
    
    def update_trackers(self, n_l, n_s, n_d, d_l, d_s, d_d):
        """Update all loss trackers."""
        n_t = n_l + n_s + n_d
        d_t = d_l + d_s + d_d
        
        for loss, tracker in zip(
            [n_l, n_s, n_d, n_t, d_l, d_s, d_d, d_t], 
            [
                self.noise_loss_tracker, self.noise_spec_tracker, self.noise_dx_tracker, self.noise_total_tracker,
                self.data_loss_tracker, self.data_spec_tracker, self.data_dx_tracker, self.data_total_tracker
            ]
        ):
            tracker.update_state(loss)
            
    def get_losses(self, y_true, y_pred):
        """Get losses for model."""
        return (
            tf.reduce_mean(
                self.loss(y_pred, y_true)
            ), spectral_norm(
                y_pred, y_true
            ), time_derivative(
                y_pred, y_true
            )
        )

    def denormalize(self, data):
        data = self.normalizer.mean + data * self.normalizer.variance**0.5
        return tf.clip_by_value(data, -128.0, 128.0)

    def diffusion_schedule(self, diffusion_times):
        start_angle = tf.acos(max_signal_rate)
        end_angle = tf.acos(min_signal_rate)
        diffusion_angles = start_angle + diffusion_times * (end_angle - start_angle)
        signal_rates = tf.cos(diffusion_angles)
        noise_rates = tf.sin(diffusion_angles)
        return noise_rates, signal_rates

    def denoise(self, noisy_data, noise_rates, signal_rates, training):
        if training:
            network = self.network
        else:
            network = self.ema_network
        pred_noises = network([noisy_data, noise_rates**2], training=training)
        pred_data = (noisy_data - noise_rates * pred_noises) / signal_rates

        return pred_noises, pred_data

    def reverse_diffusion(self, initial_noise, diffusion_steps):
        num_examples = tf.shape(initial_noise)[0]
        step_size = 1.0 / diffusion_steps

        # important line:
        # at the first sampling step, the "noisy data" is pure noise
        # but its signal rate is assumed to be nonzero (min_signal_rate)
        next_noisy_data = initial_noise
        for step in tqdm(range(diffusion_steps)):
            noisy_data = next_noisy_data

            # separate the current noisy data to its components
            diffusion_times = tf.ones((num_examples, 1, 1, 1)) - step * step_size
            noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
            pred_noises, pred_data = self.denoise(
                noisy_data, noise_rates, signal_rates, training=False
            )
            # network used in eval mode

            # remix the predicted components using the next signal and noise rates
            next_diffusion_times = diffusion_times - step_size
            next_noise_rates, next_signal_rates = self.diffusion_schedule(
                next_diffusion_times
            )
            next_noisy_data = (
                next_signal_rates * pred_data + next_noise_rates * pred_noises
            )
            # this new noisy data will be used in the next step

        return pred_data

    def generate(self, num_examples, shape, diffusion_steps):
        # noise -> data -> denormalized data
        initial_noise = tf.random.normal(shape=(num_examples, shape[0], shape[1], shape[2]))
        generated_data = self.reverse_diffusion(initial_noise, diffusion_steps)
        generated_data = self.denormalize(generated_data)
        return generated_data

    def train_step(self, data):
        batch_size = tf.shape(data)[0]
        # normalize data to have standard deviation of 1, like the noises
        data = self.normalizer(data, training=True)
        noises = tf.random.normal(shape=tf.shape(data))

        # sample uniform random diffusion times
        diffusion_times = tf.random.uniform(
            shape=(batch_size, 1, 1, 1), minval=0.0, maxval=1.0
        )
        noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
        noise_rates = noise_rates
        signal_rates = signal_rates
        # mix the data with noises accordingly
        noisy_data = signal_rates * data + noise_rates * noises

        with tf.GradientTape() as tape:
            # train the network to separate noisy data to their components
            pred_noises, pred_data = self.denoise(
                noisy_data, noise_rates, signal_rates, training=True
            )

            noise_loss, noise_spec, noise_dx = self.get_losses(noises, pred_noises) #safe_reduce_mean(self.loss(noises, pred_noises))  # used for training
            total_noise_loss = tf.reduce_sum([
                noise_loss, 
                self.spec_mod*noise_spec, 
                self.dx_mod*noise_dx
            ])
            data_loss, data_spec, data_dx = self.get_losses(data, pred_data) #safe_reduce_mean(self.loss(data, pred_data))  # only used as metric

        gradients = tape.gradient(noise_loss, self.network.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self.network.trainable_weights))

        self.update_trackers(
            noise_loss, noise_spec, noise_dx,
            data_loss, data_spec, data_dx
        )

        # track the exponential moving averages of weights
        for weight, ema_weight in zip(self.network.weights, self.ema_network.weights):
            ema_weight.assign(ema * ema_weight + (1 - ema) * weight)

        # KID is not measured during the training phase for computational efficiency
        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        # normalize data to have standard deviation of 1, like the noises
        batch_size = tf.shape(data)[0]
        
        data = self.normalizer(data, training=False)
        noises = tf.random.normal(shape=tf.shape(data))

        # sample uniform random diffusion times
        diffusion_times = tf.random.uniform(
            shape=(batch_size, 1, 1, 1), minval=0.0, maxval=1.0
        )
        noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
        # mix the data with noises accordingly
        noisy_data = signal_rates * data + noise_rates * noises

        # use the network to separate noisy data to their components
        pred_noises, pred_data = self.denoise(
            noisy_data, noise_rates, signal_rates, training=False
        )

        noise_loss = self.loss(noises, pred_noises)
        data_loss = self.loss(data, pred_data)

        self.data_loss_tracker.update_state(data_loss)
        self.noise_loss_tracker.update_state(noise_loss)

        return {m.name: m.result() for m in self.metrics}

def load_at_interval(x, rate=10_000, feats=256, duration=3.3):
    """Load music from file at some offset. Return MDCT spectrogram of that data"""
    file = x[0].numpy().decode()
    idx = x[1].numpy()
    audio, sr = librosa.load(file, duration=duration, sr=rate, offset=idx)
    audio_fill = np.zeros(int(rate*duration), dtype=np.float32)
    audio_fill[:len(audio)] = audio
    spec = tf.signal.mdct(audio_fill, feats)
    return spec

def load_audio(x,y, rate=10_000, mdct_feats=256, duration=3.3):
    """TF function for loading MDCT spectrogram from file."""
    out = tf.py_function(lambda x,y: load_at_interval( 
        (x,y), rate=rate, feats=mdct_feats, duration=duration
    ), inp=[x,y], Tout=tf.float32)
    return out

def get_files_dataset(
        glob_location,
        total_seconds=2,
        out_len = 3.3,
        hop_size=1,
        max_feats = 2048,
        batch_size=4,
        shuffer_size=1000,
        scale=1,
        rate=10_000,
        mdct_feats=256
    ):
    """Get file dataset loader for a glob of audio files."""
    
    files = glob(
        glob_location,
        recursive=True
    )
    
#     files = [file for file in files if file not in exclude]
    
    def file_list_generator():
        for _ in range(total_seconds):
            for file in files:
                yield file, _*hop_size
                
    load_fn = partial(load_audio, duration=out_len, rate=rate, mdct_feats=mdct_feats)
                
    dg =tf.data.Dataset.from_generator(
        file_list_generator, output_signature = (
            tf.TensorSpec(shape=(), dtype=tf.string), 
            tf.TensorSpec(shape=(), dtype=tf.int32)
            )).shuffle(shuffer_size).map(
                load_fn, num_parallel_calls=tf.data.AUTOTUNE).map(
                    lambda x: tf.expand_dims(x, -1)[:max_feats, :, :]*scale).map(
                        lambda x: tf.ensure_shape(x, (max_feats, mdct_feats//2, 1))).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    
    return dg

def get_model_memory_usage(batch_size, model):
    import numpy as np
    try:
        from keras import backend as K
    except:
        from tensorflow.keras import backend as K

    shapes_mem_count = 0
    internal_model_mem_count = 0
    for l in model.layers:
        layer_type = l.__class__.__name__
        if layer_type == 'Model':
            internal_model_mem_count += get_model_memory_usage(batch_size, l)
        single_layer_mem = 1
        out_shape = l.output_shape
        if type(out_shape) is list:
            out_shape = out_shape[0]
        for s in out_shape:
            if s is None:
                continue
            single_layer_mem *= s
        shapes_mem_count += single_layer_mem

    trainable_count = np.sum([K.count_params(p) for p in model.trainable_weights])
    non_trainable_count = np.sum([K.count_params(p) for p in model.non_trainable_weights])

    number_size = 4.0
    if K.floatx() == 'float16':
        number_size = 2.0
    if K.floatx() == 'float64':
        number_size = 8.0

    total_memory = number_size * (batch_size * shapes_mem_count + trainable_count + non_trainable_count)
    gbytes = np.round(total_memory / (1024.0 ** 3), 3) + internal_model_mem_count
    return gbytes

# one of the main functions:
# metadata: array path for all the paths to retrieve the datasets
def train_ai (metadata, file_path, filename):
    print("----> Metadata received: ", metadata)

    # session configuration
    session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

    ####################################### GENERATE DATASET #######################################
    # load musics with the metadata
    TrackSet = []
    
    # iterate all the metadata paths and append the files to the list
    for i in range(len(metadata)):
        music_files = glob(metadata[i] + '/*.*.wav') # depending on file extension (avoids model weights files)... maybe proper dynamic file extension later?
        dataset = get_files_dataset(
            metadata[i] + '/*.*.wav',
            out_len=3.3,
            max_feats=256,
            total_seconds=26,
            scale=1,
            batch_size=16
        )

        # @TODO: proper copy of whole array without creating matrix
        for x in music_files:
            TrackSet.append(x)
    
    #shape = dataset.take(1).shape
    for test_batch in dataset.take(1):
        shape = test_batch.shape

    print("----> Shape: ", shape)

    num_total_examples = (len(TrackSet))

    print("----> Dataset size: ", num_total_examples)

    ####################################### MODEL CREATION #######################################
    # Model creation
    print("----> Model Creation starting")
    model = DDIM(widths = [128, 128, 128, 128], block_depth = 2, attention=True, dim1=shape[1], dim2=shape[2])
    model.normalizer.adapt(dataset)

    model.compile(
        loss=tf.keras.losses.MSE,
        optimizer= tfa.optimizers.AdamW(
            learning_rate = 3e-4,
            weight_decay = 1e-4
        )
    )

    dataset = dataset.cache()

    ####################################### MODEL TRAINING #######################################
    print("----> Model Training starting")
    
    BATCH_SIZE = 8 # default is 32 by keras
    nr_for_steps = num_total_examples / BATCH_SIZE # number for the steps per epoch should be this
    
    # using the same number as the original kaggle notebook (allows more steps to train)
    kaggle_number = (num_total_examples * 26) // shape[0]
    
    # training first iteration
    history = model.fit(dataset.repeat(), steps_per_epoch=kaggle_number, epochs=1)

    # changing some stuff
    model.spec_mod = 1
    model.dx_mod = 1
    
    # train more 200 epochs after changing that
    history = model.fit(dataset.repeat(), steps_per_epoch=kaggle_number, epochs=200)
    
    ###################################### SAMPLE GENERATION ######################################
    import scipy.io.wavfile as wav
    # generating 100 samples, 10 at a time
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_0" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_1" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_2" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_3" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_4" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_5" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_6" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_7" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_8" + str(i) + ".wav", rate=10000, data=audio.numpy())
    specs = model.generate(10, shape[1:], 1000)
    # save the files on the folder
    for i in range(len(specs)):
        audio = tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32))
        wav.write(file_path + "/" + filename + "_9" + str(i) + ".wav", rate=10000, data=audio.numpy())
    
    # listen to 2 different samples on the notebook
    #for i in range(2):
    #    plt.pcolormesh(np.log(np.abs(specs[i, :, :, 0].numpy().T)))
    #    plt.colorbar()
    #    plt.title(f"Generated example {i+1}")
    #    plt.show()
    #    ipd.display(ipd.Audio(tf.signal.inverse_mdct(tf.cast(specs[i, :, :, 0], tf.float32)), rate=10_000))
    

In [5]:
# generate full path to gather different datasets
def gen_metadata(inputs, metadata):
    # define the metadata array
    for i in range(len(inputs)):
        # calculate the full path of the metadata
        FULL_PATH = BASE_PATH + inputs[i]

        metadata.append(FULL_PATH)

In [6]:
# python function to check if a path exists
# if it doesn’t exist we create one
# assumes just one path inside metadata for now
# @TODO: create a new path depending on multiple elements
def gen_output_folder(base_path, metadata):
    # remove the BASE_PATH prefix
    if metadata[0].startswith(BASE_PATH):
        path = metadata[0][len(BASE_PATH):]
        
    # make dir
    dir_path = base_path + path
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        
    return dir_path

<hr>

# Create different metadata for testing

In [7]:
BASE_PATH = "/kaggle/input/cmndataset/cmn-dataset/"

# define metadata to gather different datasets (this way, it allows to merge different things)
input_metadata_1 = ["Classical/All/All"]
metadata_1 = []

input_metadata_2 = ["Jazz/All/All"]
metadata_2 = []

input_metadata_3 = ["Rock/All/All"]
metadata_3 = []

input_metadata_4 = ["Disco/All/All"]
metadata_4 = []

input_metadata_5 = ["Blues/All/All"]
metadata_5 = []

input_metadata_6 = ["Pop/All/All"]
metadata_6 = []

input_metadata_7 = ["Reggae/All/All"]
metadata_7 = []

input_metadata_8 = ["Country/All/All"]
metadata_8 = []

input_metadata_9 = ["Metal/All/All"]
metadata_9 = []

input_metadata_10 = ["Hiphop/All/All"]
metadata_10 = []


# generated metadata for testing
gen_metadata(input_metadata_1, metadata_1)
print(metadata_1)

gen_metadata(input_metadata_2, metadata_2)
print(metadata_2)

gen_metadata(input_metadata_3, metadata_3)
print(metadata_3)

gen_metadata(input_metadata_4, metadata_4)
print(metadata_4)

gen_metadata(input_metadata_5, metadata_5)
print(metadata_5)

gen_metadata(input_metadata_6, metadata_6)
print(metadata_6)

gen_metadata(input_metadata_7, metadata_7)
print(metadata_7)

gen_metadata(input_metadata_8, metadata_8)
print(metadata_8)

gen_metadata(input_metadata_9, metadata_9)
print(metadata_9)

gen_metadata(input_metadata_10, metadata_10)
print(metadata_10)

['/kaggle/input/cmndataset/cmn-dataset/Classical/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Jazz/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Rock/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Disco/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Blues/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Pop/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Reggae/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Country/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Metal/All/All']
['/kaggle/input/cmndataset/cmn-dataset/Hiphop/All/All']


<hr>

# Train the model and generate samples

In [8]:
BASE_PATH_GENERATED = "/kaggle/working/"

# generate different file paths depending on the metadata
path_1 = gen_output_folder(BASE_PATH_GENERATED, metadata_1)
path_2 = gen_output_folder(BASE_PATH_GENERATED, metadata_2)
path_3 = gen_output_folder(BASE_PATH_GENERATED, metadata_3)
path_4 = gen_output_folder(BASE_PATH_GENERATED, metadata_4)
path_5 = gen_output_folder(BASE_PATH_GENERATED, metadata_5)
path_6 = gen_output_folder(BASE_PATH_GENERATED, metadata_6)
path_7 = gen_output_folder(BASE_PATH_GENERATED, metadata_7)
path_8 = gen_output_folder(BASE_PATH_GENERATED, metadata_8)
path_9 = gen_output_folder(BASE_PATH_GENERATED, metadata_9)
path_10 = gen_output_folder(BASE_PATH_GENERATED, metadata_10)

# train and generate samples
#train_ai(metadata_1, path_1, "generated")
#train_ai(metadata_2, path_2, "generated")
train_ai(metadata_3, path_3, "generated")
#train_ai(metadata_4, path_4, "generated")
#train_ai(metadata_5, path_5, "generated")
#train_ai(metadata_6, path_6, "generated")
#train_ai(metadata_7, path_7, "generated")
#train_ai(metadata_8, path_8, "generated")
#train_ai(metadata_9, path_9, "generated")
#train_ai(metadata_10, path_10, "generated")

----> Metadata received:  ['/kaggle/input/cmndataset/cmn-dataset/Rock/All/All']
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0

----> Shape:  (16, 256, 128, 1)
----> Dataset size:  100
----> Model Creation starting
----> Model Training starting


2023-01-26 18:13:33.651961: W tensorflow/core/common_runtime/bfc_allocator.cc:272] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.71GiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.




2023-01-26 18:19:21.180196: W tensorflow/core/kernels/data/cache_dataset_ops.cc:768] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

100%|██████████| 1000/1000 [03:14<00:00,  5.14it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
100%|██████████| 1000/1000 [03:11<00:00,  5.23it/s]
