In [1]:
import os
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import matplotlib.pyplot as plt
import librosa.display

import numpy as np
import pandas as pd
import librosa
from PIL import Image
jpath = os.path.join

In [2]:
from scipy import signal
from scipy.io import wavfile

In [3]:
hop_length = 512 # number of samples per time-step in spectrogram
n_mels = 128 # number of bins in spectrogram. Height of image
time_steps = 1024 # number of time-steps. Width of image
# extract a fixed length window
start_sample = 0 # starting at beginning
length_samples = time_steps*hop_length

In [4]:
def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled

In [5]:
def audio_to_spectogram(path, n_mels=n_mels, hop_length=hop_length):
    y, sr = librosa.load(path)
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels,
                                            n_fft=hop_length*2, hop_length=hop_length)
    mels = np.log(mels + 1e-9) # add small number to avoid log(0)

    # min-max scale to fit inside 8-bit range
    img = scale_minmax(mels, 0, 255).astype(np.uint8)
    img = np.flip(img, axis=0) # put low frequencies at the bottom in image
    img = 255-img # invert. make black==more energy
    return img

In [6]:
DATA_DIR = "sound_data"

In [7]:
OUTPUT_DIR = "spectogram_data"
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [8]:
data_bucket = {}

In [9]:
for cls in os.listdir(DATA_DIR):
    class_dir = jpath(DATA_DIR, cls)
    if os.path.isdir(class_dir):
        class_outdir = jpath(OUTPUT_DIR, cls)
        if not os.path.exists(class_outdir):
            os.makedirs(class_outdir)
        if cls not in data_bucket:
            data_bucket[cls] = 1
        else:
            data_bucket[cls] += 1
        for audio in os.listdir(class_dir):
            if audio.endswith(".wav"):
                
                path = jpath(class_dir, audio)
                spectogram_array = audio_to_spectogram(path)
                print(spectogram_array.shape)
                im = Image.fromarray(spectogram_array)
                im.save(jpath(class_outdir, audio.replace(".wav",".png")))

(128, 51)




(128, 37)
(128, 41)




(128, 421)
(128, 223)
(128, 20)
(128, 87)
(128, 119)
(128, 12)




(128, 64)




(128, 35)
(128, 44)




(128, 95)
(128, 26)
(128, 145)
(128, 28)
(128, 62)
(128, 14)
(128, 64)
(128, 57)
(128, 26)
(128, 47)
(128, 61)
(128, 741)
(128, 104)
(128, 246)
(128, 192)
(128, 24)




(128, 244)
(128, 26)
(128, 34)
(128, 320)




(128, 50)
(128, 32)
(128, 34)
(128, 11)
(128, 94)
(128, 35)
(128, 733)
(128, 11)
(128, 59)
(128, 174)
(128, 193)
(128, 53)
(128, 130)
(128, 24)




(128, 137)
(128, 236)




(128, 249)
(128, 181)
(128, 330)
(128, 190)




(128, 107)
(128, 69)
(128, 25)




(128, 192)
(128, 192)
(128, 91)
(128, 368)
(128, 123)
(128, 40)
(128, 33)




(128, 75)
(128, 64)
(128, 82)
(128, 181)
(128, 132)
(128, 347)
(128, 78)
(128, 26)
(128, 286)
(128, 64)




(128, 33)
(128, 13)
(128, 13)
(128, 411)
(128, 448)
(128, 35)
(128, 351)
(128, 55)
(128, 210)
(128, 112)
(128, 67)
(128, 442)
(128, 79)




(128, 75)
(128, 305)




(128, 37)
(128, 152)
(128, 179)
(128, 103)




(128, 517)
(128, 507)
(128, 146)
(128, 113)
(128, 150)
(128, 391)
(128, 265)
(128, 100)
(128, 401)
(128, 141)
(128, 273)
(128, 110)
(128, 101)
(128, 500)
(128, 134)
(128, 154)
(128, 111)
(128, 211)
(128, 374)
(128, 122)
(128, 323)
(128, 496)
(128, 161)
(128, 211)
(128, 163)
(128, 446)
(128, 202)
(128, 75)
(128, 192)
(128, 219)
(128, 180)
(128, 135)
(128, 237)
(128, 81)
(128, 112)
(128, 126)
(128, 97)
(128, 90)
(128, 148)
(128, 228)
(128, 176)
(128, 316)
(128, 83)
(128, 134)
(128, 139)
(128, 114)
(128, 266)
(128, 112)
(128, 182)
(128, 296)
(128, 512)
(128, 513)
(128, 166)
(128, 119)
(128, 652)
(128, 561)
(128, 354)
(128, 85)
(128, 80)
(128, 255)
(128, 86)
(128, 193)
(128, 167)
(128, 487)
(128, 405)
(128, 173)
(128, 84)
(128, 195)
(128, 142)
(128, 44)
(128, 198)
(128, 311)
(128, 69)
(128, 132)
(128, 168)
(128, 646)
(128, 83)
(128, 454)
(128, 137)
(128, 108)
(128, 125)
(128, 194)
(128, 93)
(128, 186)
(128, 128)
(128, 165)
(128, 147)
(128, 76)
(128, 98)
(128, 170)
(128, 164)
(128, 168)
(12



(128, 54)
(128, 57)
(128, 142)
(128, 251)
(128, 517)
(128, 49)
(128, 181)
(128, 70)
(128, 43)
(128, 504)
(128, 123)
(128, 517)
(128, 138)
(128, 76)
(128, 132)
(128, 517)
(128, 418)
(128, 517)
(128, 762)
(128, 517)
(128, 504)
(128, 517)
(128, 517)
(128, 496)
(128, 517)
(128, 57)
(128, 52)
(128, 268)
(128, 517)
(128, 470)
(128, 509)
(128, 203)
(128, 47)
(128, 474)
(128, 43)
(128, 84)
(128, 57)
(128, 56)
(128, 377)
(128, 77)
(128, 517)
(128, 513)
(128, 517)
(128, 104)
(128, 483)
(128, 102)
(128, 504)
(128, 517)
(128, 501)
(128, 410)
(128, 517)
(128, 111)
(128, 366)
(128, 517)
(128, 50)
(128, 418)
(128, 517)
(128, 110)
(128, 568)
(128, 513)
(128, 51)
(128, 504)
(128, 40)
(128, 584)
(128, 405)
(128, 466)
(128, 453)
(128, 466)
(128, 517)
(128, 444)
(128, 78)
(128, 513)
(128, 747)
(128, 65)
(128, 47)
(128, 517)
(128, 517)
(128, 509)
(128, 517)
(128, 517)
(128, 393)
(128, 517)
(128, 646)
(128, 500)
(128, 55)
(128, 302)
(128, 527)
(128, 577)
(128, 517)
(128, 66)
(128, 513)
(128, 483)
(128, 517)



(128, 111)
(128, 200)




(128, 73)
(128, 84)
(128, 69)




(128, 82)
(128, 136)
(128, 60)
(128, 91)
(128, 64)
(128, 44)
(128, 67)
(128, 122)
(128, 99)
(128, 56)
(128, 69)




(128, 113)
(128, 226)




(128, 68)




(128, 113)
(128, 84)
(128, 201)
(128, 90)
(128, 71)




(128, 77)
(128, 71)
(128, 91)




(128, 64)
(128, 99)
(128, 69)
(128, 34)
(128, 97)
(128, 198)




(128, 50)
(128, 50)
(128, 57)
(128, 85)
(128, 100)
(128, 102)




(128, 212)
(128, 76)
(128, 68)
(128, 89)
(128, 87)




(128, 66)
(128, 193)
(128, 115)
(128, 96)
(128, 246)
(128, 90)
(128, 55)
(128, 165)




(128, 226)
(128, 198)
(128, 73)
(128, 74)
(128, 369)
(128, 76)
(128, 103)
(128, 436)
(128, 164)
(128, 68)
(128, 86)
(128, 513)
(128, 65)
(128, 517)
(128, 39)
(128, 41)




(128, 69)
(128, 513)
(128, 655)
(128, 181)
(128, 259)
(128, 27)
(128, 22)
(128, 60)
(128, 54)
(128, 27)
(128, 261)
(128, 41)
(128, 171)




(128, 82)
(128, 38)
(128, 517)
(128, 35)
(128, 272)
(128, 35)
(128, 362)
(128, 136)
(128, 71)
(128, 357)
(128, 55)
(128, 517)
(128, 491)
(128, 517)
(128, 343)
(128, 249)
(128, 232)
(128, 517)
(128, 55)
(128, 23)
(128, 214)
(128, 453)
(128, 479)
(128, 91)
(128, 441)
(128, 517)
(128, 63)
(128, 171)
(128, 517)
(128, 169)




(128, 39)
(128, 83)
(128, 52)
(128, 517)
(128, 55)
(128, 54)
(128, 517)
(128, 44)
(128, 253)
(128, 35)
(128, 84)
(128, 87)
(128, 47)
(128, 52)
(128, 71)
(128, 41)
(128, 76)
(128, 254)
(128, 240)
(128, 216)
(128, 104)




(128, 41)
(128, 292)
(128, 517)
(128, 629)
(128, 517)
(128, 439)
(128, 491)
(128, 775)
(128, 517)
(128, 52)




(128, 179)
(128, 509)
(128, 186)
(128, 91)
(128, 513)
(128, 45)
(128, 509)
(128, 496)
(128, 41)
(128, 39)




(128, 517)
(128, 104)
(128, 103)




(128, 491)
(128, 113)
(128, 138)
(128, 102)
(128, 58)
(128, 102)
(128, 100)




(128, 117)
(128, 113)




(128, 71)
(128, 71)
(128, 92)
(128, 71)




(128, 113)
(128, 98)
(128, 58)
(128, 102)
(128, 262)
(128, 59)
(128, 117)
(128, 58)
(128, 113)




(128, 102)
(128, 100)
(128, 33)
(128, 102)
(128, 40)
(128, 140)
(128, 377)
(128, 22)
(128, 34)
(128, 30)
(128, 131)
(128, 345)
(128, 30)
(128, 77)
(128, 12)
(128, 13)
(128, 42)
(128, 51)
(128, 38)
(128, 16)
(128, 140)
(128, 39)




(128, 87)
(128, 30)
(128, 30)
(128, 50)
(128, 331)
(128, 38)
(128, 94)
(128, 53)
(128, 40)
(128, 28)
(128, 281)
(128, 48)
(128, 153)
(128, 19)
(128, 34)
(128, 49)




(128, 145)
(128, 201)
(128, 429)
(128, 18)
(128, 10)
(128, 209)
(128, 56)
(128, 14)
(128, 108)




(128, 59)
(128, 18)
(128, 173)
(128, 49)
(128, 31)
(128, 16)
(128, 280)
(128, 59)
(128, 18)




(128, 49)
(128, 368)
(128, 139)
(128, 10)
(128, 190)
(128, 160)
(128, 138)
(128, 170)
(128, 17)
(128, 17)
(128, 19)
(128, 101)
(128, 74)
(128, 302)
(128, 55)
(128, 428)




(128, 77)
(128, 117)
(128, 74)
(128, 125)
(128, 310)
(128, 384)
(128, 215)
(128, 184)
(128, 366)
(128, 118)




(128, 336)
(128, 302)
(128, 125)
(128, 90)
(128, 131)




(128, 74)
(128, 214)
(128, 384)
(128, 336)
(128, 87)
(128, 49)
(128, 125)
(128, 194)
(128, 40)
(128, 114)
(128, 116)
(128, 211)
(128, 94)
(128, 361)
(128, 43)
(128, 319)
(128, 244)
(128, 116)
(128, 211)
(128, 368)
(128, 225)
(128, 152)
(128, 168)
(128, 190)
(128, 77)
(128, 368)
(128, 133)
(128, 42)
(128, 168)
(128, 46)
(128, 598)
(128, 598)
(128, 81)
(128, 54)
(128, 51)




(128, 77)
(128, 197)
(128, 72)
(128, 341)
(128, 297)
(128, 225)
(128, 89)
(128, 425)
(128, 347)
(128, 140)




(128, 72)




(128, 307)
(128, 72)
(128, 63)
(128, 104)
(128, 54)
(128, 341)
(128, 87)
(128, 44)
(128, 44)
(128, 280)
(128, 65)
(128, 367)
(128, 87)
(128, 87)
(128, 87)
(128, 87)
(128, 44)
(128, 280)
(128, 44)
(128, 108)
(128, 44)
(128, 65)
(128, 44)
(128, 44)
(128, 130)
(128, 388)
(128, 44)
(128, 65)
(128, 44)
(128, 44)
(128, 65)
(128, 44)
(128, 87)
(128, 44)
(128, 44)
(128, 65)
(128, 259)
(128, 44)
(128, 87)
(128, 65)
(128, 44)
(128, 216)
(128, 65)
(128, 108)
(128, 44)
(128, 44)
(128, 108)
(128, 65)
(128, 44)
(128, 44)
(128, 65)
(128, 44)
(128, 44)
(128, 87)
(128, 44)
(128, 87)
(128, 87)
(128, 216)
(128, 87)
(128, 65)
(128, 44)
(128, 44)
(128, 44)
(128, 108)
(128, 65)
(128, 237)
(128, 65)
(128, 108)
(128, 65)
(128, 44)
(128, 65)
(128, 44)
(128, 65)
(128, 65)
(128, 44)
(128, 65)
(128, 130)
(128, 44)
(128, 44)
(128, 65)
(128, 44)
(128, 151)
(128, 44)
(128, 22)
(128, 44)
(128, 44)
(128, 44)
(128, 87)
(128, 65)
(128, 44)
(128, 44)
(128, 431)
(128, 65)
(128, 87)
(128, 44)
(128, 44)
(128, 108)
(128, 44)