In [1]:
import numpy as np
import copy
import random
import pandas as pd
from tqdm import tqdm
import librosa
import matplotlib.pyplot as plt
import os
import pandas
import h5py
import librosa.display
import IPython.display as ipd
import skimage.io
from PIL import Image
import pickle


RAWDATA_PATH = 'E:\iot\data\Glosy_zbior'
SAVE_PATH = 'E:\iot\data\spectrograms.h5'

peoples = ['Adrian',
           'Bartek',
           'Damian',
           'Ewelina',
           'Hubert',
           'jakub',
           'Kamil',
           'Kasia',
           'Kuba',
           'Lukasz',
           'Mariusz',
           'Mikolaj',
           'oskar',
           'patryk',
           'Pawel',
           'przemek',
           'Rafal',
           'szymon']

words = ['background',
         'close',
         'door',
         'down',
         'go',
         'home',
         'left',
         'light',
         'no',
         'off',
         'on',
         'open',
         'right',
         'shutdown',
         'silence',
         'speech',
         'stop',
         'unknown',
         'up',
         'windows',
         'yes',
         ]

def save_to_h5(path, group_name, spectros, labels):
    try:
        with h5py.File(path, 'r+') as hdf:
            spectro_group = hdf.create_group(group_name +'_spectros')
            labels_group = hdf.create_group(group_name +'_labels')

            for i, arr in enumerate(spectros):
                spectro_group.create_dataset(str(i), data=arr)

            for i, string in enumerate(labels):
                labels_group.create_dataset(str(i), data=string)

            hdf.close()
    except:
        with h5py.File(path, 'w') as hdf:
            spectro_group = hdf.create_group(group_name +'_spectros')
            labels_group = hdf.create_group(group_name +'_labels')

            for i, arr in enumerate(spectros):
                spectro_group.create_dataset(str(i), data=arr)

            for i, string in enumerate(labels):
                labels_group.create_dataset(str(i), data=string)

            hdf.close()

labels = commands

In [2]:
# nonaugmented
spectros = []
labels = []

for root_dir, cur_dir, files in os.walk(RAWDATA_PATH):
    for file in tqdm(files):
        for command in words:
            if str('_' + command.lower() + '_') in file.lower():
                labels.append(command)
                y, sr = librosa.load(root_dir + '\\' + file)
                sample = librosa.stft(y)
                sample = librosa.amplitude_to_db(np.abs(sample), ref=np.max)
                spectros.append(sample)
                break

save_to_h5(path=SAVE_PATH, group_name='commands_no_aug', spectros=spectros, labels=labels)

100%|██████████| 1134/1134 [00:06<00:00, 182.39it/s]


In [3]:
# augmented
spectros = []
labels = []

NOISE_RANGE = (0, 2)
pitch_shift_ranges = np.linspace(-2.5, 2.5, num=3)
time_shift_ranges = np.linspace(0.8, 1.2, num=3)

for root_dir, cur_dir, files in os.walk(RAWDATA_PATH):
    for file in tqdm(files):
        for command in words:
            if str('_' + command.lower() + '_') in file.lower():
                y, sr = librosa.load(root_dir + '\\' + file)
                for pitch_shift in pitch_shift_ranges:
                    for time_shift in time_shift_ranges:
                        sample = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
                        sample = librosa.effects.time_stretch(sample, rate=time_shift)
                        sample = sample * np.random.uniform(*NOISE_RANGE, size=sample.shape)
                        sample = librosa.stft(sample)
                        sample = librosa.amplitude_to_db(np.abs(sample), ref=np.max)
                        spectros.append(sample)
                        labels.append(command)
                break

save_to_h5(path=SAVE_PATH, group_name='commands_aug', spectros=spectros, labels=labels)

100%|██████████| 1134/1134 [03:54<00:00,  4.84it/s]


labels = persons

In [4]:
# nonaugmented
spectros = []
labels = []

for root_dir, cur_dir, files in os.walk(RAWDATA_PATH):
    for file in tqdm(files):
        for person in peoples:
            if str(person.lower() + '_') in file.lower():
                labels.append(person)
                y, sr = librosa.load(root_dir + '\\' + file)
                sample = librosa.stft(y)
                sample = librosa.amplitude_to_db(np.abs(sample), ref=np.max)
                spectros.append(sample)
                break

save_to_h5(path=SAVE_PATH, group_name='peoples_no_aug', spectros=spectros, labels=labels)

100%|██████████| 1134/1134 [00:03<00:00, 323.81it/s]


In [5]:
# augmented
spectros = []
labels = []

NOISE_RANGE = (0, 2)
pitch_shift_ranges = np.linspace(-2.5, 2.5, num=3)
time_shift_ranges = np.linspace(0.8, 1.2, num=3)

for root_dir, cur_dir, files in os.walk(RAWDATA_PATH):
    for file in tqdm(files):
        for person in peoples:
            if str(person.lower() + '_') in file.lower():
                y, sr = librosa.load(root_dir + '\\' + file)
                for pitch_shift in pitch_shift_ranges:
                    for time_shift in time_shift_ranges:
                        sample = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
                        sample = librosa.effects.time_stretch(sample, rate=time_shift)
                        sample = sample * np.random.uniform(*NOISE_RANGE, size=sample.shape)
                        sample = librosa.stft(sample)
                        sample = librosa.amplitude_to_db(np.abs(sample), ref=np.max)
                        spectros.append(sample)
                        labels.append(person)
                break

save_to_h5(path=SAVE_PATH, group_name='peoples_aug', spectros=spectros, labels=labels)

100%|██████████| 1134/1134 [03:52<00:00,  4.87it/s]


In [None]:
len(spectros)

In [48]:
with h5py.File('E:\iot\data\spectrograms.h5', 'r') as hdf:
    print(hdf['peoples_aug_labels']['10'][()].decode('utf-8'))

background


In [None]:
# from collections import Counter
# counter_object = Counter(labels_list)
# keys = counter_object.keys()
# print(keys, counter_object.values())
# len(labels_list)