In [1]:
import os
from os.path import isdir, join
from pathlib import Path
import pandas as pd
import time

from sklearn.utils import shuffle # shuffling of data
from random import sample # random selection
from tqdm import tqdm # progress bar

# Math
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
import librosa
from PIL import Image
import h5py

from sklearn.decomposition import PCA

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import librosa.display

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls

In [2]:
def log_specgram(audio, sample_rate, window_size=10, 
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    _, _, spec = signal.spectrogram(audio, fs=sample_rate,
                                    window='hann', nperseg=nperseg, noverlap=noverlap,
                                    detrend=False)
    return np.log(spec.T.astype(np.float32) + eps)

def audio_to_data(filepath1):
    # we take a single path and convert it into data
    sample_rate, audio = wavfile.read(filepath1)
    spectrogram = log_specgram(audio, sample_rate, 10, 0)
    return spectrogram.T

def paths_to_data(paths):
    data = []
    #labels = []
    for i in tqdm(range(len(paths))):
        f = paths[i]
        audio = audio_to_data(path + paths[i])        
        data[i] = audio
        #labels.append(word2id[f.split('/')[-2]])
    return data, labels
    


In [3]:
train_curated_annot = pd.read_csv('../input/train_curated.csv')
train_curated_path = '../input/train_curated/'
train_curated_files = os.listdir(train_curated_path)
path = train_curated_path

imgw, imgh  = 128, 128
data2 = np.zeros(shape = (len(train_curated_files), imgw, imgh))
                 
for i in range(len(train_curated_files)):
    data1= audio_to_data(path + train_curated_files[i])
    img=Image.fromarray(data1)
    img = img.resize((imgw, imgh), Image.ANTIALIAS)
    data2[i]=np.array(img)

h5f = h5py.File('data_img_traincurate.h5', 'w')
h5f.create_dataset('train_curated_hdf', data=data2)
h5f.close()

In [4]:
train_noisy_annot = pd.read_csv('../input/train_noisy.csv')
train_noisy_audio_path = '../input/train_noisy/'
train_noisy_files = os.listdir(train_noisy_audio_path)
path = train_noisy_audio_path

imgw, imgh  = 128, 128
data2 = np.zeros(shape = (len(train_noisy_files), imgw, imgh))
                 
for i in range(len(train_noisy_files)):
    data1= audio_to_data(path + train_noisy_files[i])
    img=Image.fromarray(data1)
    img = img.resize((imgw, imgh), Image.ANTIALIAS)
    data2[i]=np.array(img)

h5f = h5py.File('data_img_trainnoisy.h5', 'w')
h5f.create_dataset('train_noisy_hdf', data=data2)
h5f.close()

In [5]:
test_audio_path = '../input/test/'
test_files = np.sort(os.listdir(test_audio_path))
path = test_audio_path

imgw, imgh  = 128, 128
data2 = np.zeros(shape = (len(test_files), imgw, imgh))
                 
for i in range(len(test_files)):
    data1= audio_to_data(path + test_files[i])
    img=Image.fromarray(data1)
    img = img.resize((imgw, imgh), Image.ANTIALIAS)
    data2[i]=np.array(img)

h5f = h5py.File('data_img_test.h5', 'w')
h5f.create_dataset('test_hdf', data=data2)
h5f.close()