# Random

# Generate high-dimensional labels

Use this notebook to generate high-dimensional labels and save them in .npy files. 

## Speech 

In [1]:
#!pip install git+https://github.com/librosa/librosa
from skimage.transform import resize
import numpy as np
import librosa
import librosa.display
# The paper uses librosa version 0.7.1.
print('librosa version is', librosa.version.version)

librosa version is 0.8.1


In [4]:
audio_files = ['0cmf2|Airplane', '0k4j|Car', '015p6|Bird', '01yrx|Cat', '09kx5|Deer', '0bt9lr|Dog', '09ld4|Frog', '03k3r|Horse', '06q74|Ship', '07r04|Truck']
mels = np.ndarray(shape=(10, 64, 64), dtype='float32')
for i in range(10):
    y, sr = librosa.load('cifar10_wav/{}.wav'.format(audio_files[i]), sr=22050)
    # trim silent edges
    y, _ = librosa.effects.trim(y)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=256, n_mels=64, fmax=8000)
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = resize(mel, (64, 64))
    mels[i] = mel

In [None]:
np.save('cifar10_speech', mels)

In [None]:
mels = np.ndarray(shape=(100, 64, 64), dtype='float32')
for i in range(100):
    y, sr = librosa.load('cifar100_wav/{}.wav'.format(i), sr=22050)
    # trim silent edges
    y, _ = librosa.effects.trim(y)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=256, n_mels=64, fmax=8000)
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = resize(mel, (64, 64))
    mels[i] = mel

In [None]:
np.save('cifar100_speech', mels)

### New Labels

In [5]:
audio_files = ['0cmf2|Airplane', '0k4j|Car', '015p6|Bird', '01yrx|Cat', '09kx5|Deer', '0bt9lr|Dog', '09ld4|Frog', '03k3r|Horse', '06q74|Ship', '07r04|Truck']
mels = np.ndarray(shape=(10, 64, 64), dtype='float32')
for i in range(10):
    y, sr = librosa.load('chantel_wav/{}.wav'.format(audio_files[i]), sr=22050)
    # trim silent edges
    y, _ = librosa.effects.trim(y)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=256, n_mels=64, fmax=8000)
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = resize(mel, (64, 64))
    mels[i] = mel
    
np.save('label_files/chantel/cifar10_speech', mels)

In [5]:
audio_files = ['0cmf2|Airplane', '0k4j|Car', '015p6|Bird', '01yrx|Cat', '09kx5|Deer', '0bt9lr|Dog', '09ld4|Frog', '03k3r|Horse', '06q74|Ship', '07r04|Truck']
mels = np.ndarray(shape=(10, 64, 64), dtype='float32')
for i in range(10):
    y, sr = librosa.load('french_wav/{}.wav'.format(audio_files[i]), sr=22050)
    # trim silent edges
    y, _ = librosa.effects.trim(y)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=256, n_mels=64, fmax=8000)
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = resize(mel, (64, 64))
    mels[i] = mel
    
np.save('label_files/french/cifar10_speech', mels)

## Shuffle

In [None]:
def shuffleSlices(num_slices):
    slice_idx = np.arange(num_slices)
    while np.array_equal(slice_idx, np.arange(num_slices)):
        np.random.shuffle(slice_idx)
    if 64 % num_slices == 0:
        slice_size = 64 // num_slices
    else:
        slice_size = 64 // (num_slices - 1)
    shuffle_slices = []
    for i in slice_idx:
        start = i * slice_size
        end = min(64, start + slice_size)
        shuffle_slices.extend(range(start, end))
    return shuffle_slices
shuffle_seed = 8
num_slices = 64

In [None]:
audio_files = ['0cmf2|Airplane', '0k4j|Car', '015p6|Bird', '01yrx|Cat', '09kx5|Deer', '0bt9lr|Dog', '09ld4|Frog', '03k3r|Horse', '06q74|Ship', '07r04|Truck']
mels = np.ndarray(shape=(10, 64, 64), dtype='float32')
np.random.seed(shuffle_seed)
for i in range(10):
    y, sr = librosa.load('cifar10_wav/{}.wav'.format(audio_files[i]), sr=22050)
    # trim silent edges
    y, _ = librosa.effects.trim(y)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=256, n_mels=64, fmax=8000)
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = resize(mel, (64, 64))
    # shuffle (column wise)
    mels[i] = mel[:, shuffleSlices(num_slices)]

In [None]:
np.save('cifar10_shuffle', mels)

In [None]:
mels = np.ndarray(shape=(100, 64, 64), dtype='float32')
np.random.seed(shuffle_seed)
for i in range(100):
    y, sr = librosa.load('cifar100_wav/{}.wav'.format(i), sr=22050)
    # trim silent edges
    y, _ = librosa.effects.trim(y)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=256, n_mels=64, fmax=8000)
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = resize(mel, (64, 64))
    # shuffle (column wise)
    mels[i] = mel[:, shuffleSlices(num_slices)]

In [None]:
np.save('cifar100_shuffle', mels)

In [None]:
mels = np.ndarray(shape=(10, 64, 64), dtype='float32')
np.random.seed(shuffle_seed)
for i in range(10):
    y, sr = librosa.load('chantel_wav/{}.wav'.format(audio_files[i]), sr=22050)
    # trim silent edges
    y, _ = librosa.effects.trim(y)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=256, n_mels=64, fmax=8000)
    mel = librosa.power_to_db(mel, ref=np.max)
    mel = resize(mel, (64, 64))
    # shuffle (column wise)
    mels[i] = mel[:, shuffleSlices(num_slices)]

In [None]:
np.save('chantel_shuffle', mels)

## Uniform

In [None]:
mels = np.ones((10, 64, 64), dtype='float32')
for i in range(10):
    mels[i] = mels[i] * i * 80 / 9
np.save('cifar10_uniform', mels)

In [None]:
mels = np.ones((100, 64, 64), dtype='float32')
for i in range(100):
    mels[i] = mels[i] * i * 80 / 99
np.save('cifar100_uniform', mels)

In [None]:
mels = np.ones((10, 64, 64), dtype='float32')
for i in range(10):
    mels[i] = mels[i] * i * 80 / 9
np.save('chantel_uniform', mels)

In [9]:
mels = np.ones((200, 64, 64), dtype='float32')
for i in range(200):
    mels[i] = mels[i] * i * 80 / 199
print(mels)
#np.save('tiny_imagenet_uniform', mels)

[[[ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  ...
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]

 [[ 0.40201005  0.40201005  0.40201005 ...  0.40201005  0.40201005
    0.40201005]
  [ 0.40201005  0.40201005  0.40201005 ...  0.40201005  0.40201005
    0.40201005]
  [ 0.40201005  0.40201005  0.40201005 ...  0.40201005  0.40201005
    0.40201005]
  ...
  [ 0.40201005  0.40201005  0.40201005 ...  0.40201005  0.40201005
    0.40201005]
  [ 0.40201005  0.40201005  0.40201005 ...  0.40201005  0.40201005
    0.40201005]
  [ 0.40201005  0.40201005  0.40201005 ...  0.40201005  0.40201005
    0.40201005]]

 [[ 0.8040201   0.8040201   0.804020

## Composite

In [6]:
import numpy as np
import param
import imagen as ig
import numbergen as ng



TypeError: object of type <class 'float'> cannot be safely interpreted as an integer.

In [None]:
# sum 10 oriented Gaussian patterns,
# each with random positions and orientations,
# giving a different overall pattern at each time
param.Dynamic.time_dependent=False
%opts Image (cmap='gray')
gs = ig.Composite(operator=np.add,
                  generators=[ig.Gaussian(size=0.15,
                                          x=ng.UniformRandom(seed=i+1)-0.5,
                                          y=ng.UniformRandom(seed=i+2)-0.5,
                                          orientation=np.pi*ng.UniformRandom(seed=i+3))
                                for i in range(10)])
labels = [gs() for _ in range(100)]

In [None]:
from PIL import Image
mels = np.zeros(shape=(100, 64, 64), dtype='float32')
for i in range(100):
    data = (labels[i] - labels[i].min()) / (labels[i].max() - labels[i].min()) * 255
    img = Image.fromarray(data)
    img = img.resize((64, 64)) 
    mels[i] = np.array(img) / 255 * 80
    # store images
    #img.convert("L").save('Desktop/speech_label/final/composite/{}.png'.format(i))
#     img.show()

In [None]:
import numpy as np
np.save('cifar10_composite', mels[:10])
np.save('chantel_composite', mels[:10])
np.save('cifar100_composite', mels)