# Google Cloud Plateform
- Use of GCP bucket files to store wav files.
- Convert wav to npy file which avoid the loss of information in comparison to jpg

In [1]:
from google.cloud import storage
import tarfile
import pathlib
from scipy import signal
from tqdm import tqdm
import os
import numpy as np
import tensorflow as tf
import librosa

def download_blob(bucket_name, source_blob_name, destination_file_name):
    """Downloads a blob from the bucket."""
    # bucket_name = "your-bucket-name"
    # source_blob_name = "storage-object-name"
    # destination_file_name = "local/path/to/file"

    storage_client = storage.Client()

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)

    print(
        "Blob {} downloaded to {}.".format(
            source_blob_name, destination_file_name
        )
    )

In [3]:
%%time 
#Import files from bucket
bucket_name = 'music_file'
files_to_import = {'wav_instruments.tar':'instruments.tar',
                   'dataset_composer.tar':'composers.tar',
                  'IRMAS-TestingData-Part1.tar':'IRMA_test_1.tar',
                  'IRMAS-TestingData-Part2.tar':'IRMA_test_2.tar',
                  'IRMAS-TestingData-Part3.tar':'IRMA_test_3.tar'}

for source_blob_name, destination_file_name in tqdm(files_to_import.items()):
    download_blob(bucket_name, source_blob_name, destination_file_name)
    file = tarfile.open(destination_file_name, mode="r")
    file.extractall(path = destination_file_name[:-4])
    file.close()

  0%|          | 0/5 [00:00<?, ?it/s]

Blob IRMAS-TestingData-Part1.tar downloaded to IRMA_test_1.tar.


 20%|██        | 1/5 [01:12<04:49, 72.31s/it]

Blob wav_instruments.tar downloaded to instruments.tar.


 40%|████      | 2/5 [02:55<04:04, 81.46s/it]

Blob dataset_composer.tar downloaded to composers.tar.


 60%|██████    | 3/5 [04:39<02:56, 88.48s/it]

Blob IRMAS-TestingData-Part2.tar downloaded to IRMA_test_2.tar.


 80%|████████  | 4/5 [06:16<01:30, 90.96s/it]

Blob IRMAS-TestingData-Part3.tar downloaded to IRMA_test_3.tar.


100%|██████████| 5/5 [07:07<00:00, 85.42s/it]

CPU times: user 1min 52s, sys: 55.4 s, total: 2min 47s
Wall time: 7min 7s





In [None]:
#Numpy files creation
def files_processing(filepath):
    file_lenght = 20 #second
    sampling_rate = 44100
    n_fft = 4096
    hop_length = int(n_fft/4)
    channels = 1 # 2 for stereo
    audio_binary = tf.io.read_file(filepath)
    waveform = tf.audio.decode_wav(audio_binary,
                                 desired_channels = channels)

    signal = tf.reshape(waveform.audio,[channels,-1])
    print(signal.shape)
    stft = tf.signal.stft(signal,
                        frame_length=n_fft,
                        frame_step = hop_length)
    stft_amp = tf.abs(stft)
    stft_amp = tf.transpose(stft_amp,perm=[2,1,0])
    return stft_amp

def full_pre_processing(folder):
    train_folder = pathlib.Path(folder + 'wav')
    files_path = [str(sound_path) for sound_path in list(train_folder.glob("*/*"))]
    data_spec = tf.data.Dataset.from_tensor_slices(files_path)
    data_spec = data_spec.map(files_processing)
    return files_path,data_spec

def folder_formating(folder):
    directory_wav = folder + 'wav'
    directory_npy = folder + 'npy'

    if 'npy' not in os.listdir(directory_wav[:-4]):
        os.mkdir(directory_npy)

    for sub_folder in os.listdir(directory_wav):
        if sub_folder not in os.listdir(directory_npy):
            os.mkdir(directory_npy + '/' + sub_folder)
    
    return directory_wav,directory_npy

def folder_formating_cqt(folder):
    directory_wav = folder + 'wav'
    directory_cqt = folder + 'cqt'

    if 'cqt' not in os.listdir(directory_wav[:-4]):
        os.mkdir(directory_cqt)

    for sub_folder in os.listdir(directory_wav):
        if sub_folder not in os.listdir(directory_cqt):
            os.mkdir(directory_cqt + '/' + sub_folder)
    
    return directory_wav,directory_cqt

def cqt_transform(path):
    waveform = librosa.load(path,sr = 44100)[0]
    cqt = librosa.core.cqt(waveform,hop_length = 256, n_bins=252, bins_per_octave=3*12)
    cqt = librosa.util.normalize(cqt, norm=1)
    cqt_amp = np.abs(cqt)
    cqt_amp = np.reshape(cqt_amp,(cqt_amp.shape[0],cqt_amp.shape[1],1))
    return cqt_amp

In [6]:
# Transformation des fichier .wav en numpy pour accélerer l'entrainement du réseau de neurone
folders = ['composers/dataset/']#,'dataset_composer/dataset/']
for folder in folders:
    n = 0
    directory_wav,directory_npy = folder_formating(folder)
    files_path,data_spec = full_pre_processing(folder)
    print(len(files_path))
    
    for file in data_spec:
        filename = files_path[n].replace('wav','npy')
        np.save(filename,file.numpy())
        n+=1
        print(n,end = '\r')

(1, None)
1106
1106

In [7]:
folders = ['instruments/','composers/dataset/']
for folder in folders:
    
    directory_wav,directory_cqt = folder_formating_cqt(folder)
    train_folder = pathlib.Path(folder + 'wav')
    
    files_path = [str(sound_path) for sound_path in list(train_folder.glob("*/*"))]
    n=0
    print(len(files_path))
    for path in files_path:
        cqt = cqt_transform(path)
        filename = path.replace('.wav','.npy').replace('wav/','cqt/')
        np.save(filename,cqt)
        print(n,end = '\r')
        n+=1

6705
1106
1105

In [10]:
np.load('instruments/cqt/pia/[pia][cla]1284__1.npy').max()

5.127531

In [107]:
# fonction de formatage des fichiers de test du réseau

def test_formating(path):

    n_fft = 4096
    hop_length = int(n_fft/4)
    channels = 1 # 2 for stereo

    filepath = test_path[0]
    audio_binary = tf.io.read_file(filepath)
    waveform = tf.audio.decode_wav(audio_binary,
                                    desired_channels = 1)

    file = next(iter(waveform))
    out = []
    lenght = 132299
    for i in range(0,min(int(len(file)/lenght),16)):
        start = i*lenght
        end = (i+1)*lenght
        out.append(file[start:end,:])

    empty_file = tf.constant([[0.0] for i in range(0,lenght)])
    save = len(out)
    while len(out) < 16:
        out.append(empty_file)

    out_stft = []
    for extract in out:
        signal = tf.reshape(extract,[channels,-1])

        stft = tf.signal.stft(signal,
                            frame_length=n_fft,
                            frame_step = hop_length)
        stft_amp = tf.abs(stft)
        stft_amp = tf.transpose(stft_amp,perm=[2,1,0])
        out_stft.append(stft_amp)
    batched = tf.data.Dataset.from_tensor_slices(out_stft).batch(16)
    fichier = open(filepath[:-4] + '.txt')
    prediction = []
    for line in fichier:
        prediction.append(line.rstrip('\n').rstrip('\t'))
    return batched,prediction