In [7]:
import numpy as np
import librosa

def feature_extract(file):
    """
    Define function that takes in a file an returns features in an array
    """
    
    #get wave representation
    y, sr = librosa.load(file)
        
    #determine if instruemnt is harmonic or percussive by comparing means
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    if np.mean(y_harmonic)>np.mean(y_percussive):
        harmonic=1
    else:
        harmonic=0
        
    #Mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    #temporal averaging
    mfcc=np.mean(mfcc,axis=1)
    
    #get the mel-scaled spectrogram
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,fmax=8000)  
    #temporally average spectrogram
    spectrogram = np.mean(spectrogram, axis = 1)
    
    #compute chroma energy
    chroma = librosa.feature.chroma_cens(y=y, sr=sr)
    #temporally average chroma
    chroma = np.mean(chroma, axis = 1)
    
    #compute spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast = np.mean(contrast, axis= 1)
    
    return [harmonic, mfcc, spectrogram, chroma, contrast]

In [9]:
path = r'D:\reposetory\Save_Transport\DATA\cut\44.1K\4_01.wav'
feature = feature_extract(path)
print(feature[2])

[5.07904887e-02 2.20128596e-01 1.38528854e-01 4.58961874e-02
 2.08877940e-02 2.37714685e-02 1.76975820e-02 8.16782843e-03
 7.61732738e-03 1.14493985e-02 7.42848031e-03 3.20760347e-03
 1.39338542e-02 1.40447263e-02 1.95032749e-02 1.16606606e-02
 1.30419442e-02 1.06023801e-02 7.26874545e-03 3.76877002e-03
 3.33984103e-03 2.56086653e-03 3.46203707e-03 2.50261510e-03
 3.83925717e-03 2.96931411e-03 3.40772234e-03 5.81459096e-03
 9.30679776e-03 6.03048131e-03 2.26576719e-03 1.29700801e-03
 1.00522686e-03 8.43924470e-04 7.92967505e-04 8.16548360e-04
 1.55140774e-03 1.89333514e-03 1.53205777e-03 1.17532851e-03
 1.04081479e-03 7.92911567e-04 6.45688444e-04 8.49166419e-04
 8.53000558e-04 1.20406435e-03 8.86943249e-04 5.45135525e-04
 9.62853199e-04 1.68143190e-03 1.39013364e-03 1.68582215e-03
 1.73219061e-03 2.48981197e-03 2.40032747e-03 2.46487232e-03
 1.89907092e-03 1.37262745e-03 2.48721684e-03 3.04060895e-03
 1.97749445e-03 1.51754799e-03 1.31367869e-03 1.36673171e-03
 1.25765521e-03 1.452703

In [1]:
def create_spectrogram(filename, path):
    plt.interactive(False)
    clip, sample_rate = librosa.load(filename, sr=None)
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    
    filename = path
    plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del filename,clip,sample_rate,fig,ax,S

In [2]:
#r"D:\reposetory\Save_Transport\dataset\train\one_sec_cut\*"
#r"D:\reposetory\Save_Transport\dataset\train\one_sec_noise\*"
#r"D:\reposetory\Save_Transport\dataset\test\one_sec_cut\*"
#r"D:\reposetory\Save_Transport\dataset\test\one_sec_noise\*"

Data_dir=np.array(glob(r"D:\reposetory\Save_Transport\dataset\test\one_sec_noise\*"))
#%load_ext memory_profiler
#%memit 

i=0
for file in Data_dir[i:i+2000]:
    #Define the filename as is, "name" refers to the JPG, and is split off into the number itself. 
    filename,name = file,file.split('\\')[-1].split('.')[0]
    path = r'D:\reposetory\Save_Transport\dataset\test\one_sec_noise.jpg\\' + name + '.jpg'
    create_spectrogram(filename, path)
gc.collect()

NameError: name 'np' is not defined

In [5]:
import pyaudio
import wave

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")
print(data)
print(frames.append(data))

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

* recording
* done recording
b'Z\x15Y\x15-\x11,\x11\xc4\x0b\xc5\x0b\x05\x08\x04\x08{\x07{\x07\xac\t\xab\t\xd9\x0c\xd9\x0c\xad\x0e\xad\x0e\xa0\r\xa0\r\xc8\t\xc7\tO\x04P\x04\x0c\xff\x0c\xff\x7f\xfb~\xfb/\xfa.\xfa\xf9\xfa\xf9\xfa\xf4\xfc\xf4\xfc\xe7\xfe\xe7\xfe\xc6\xff\xc5\xff\x02\xff\x02\xff\xee\xfc\xef\xfc\x94\xfa\x95\xfa<\xf9<\xf9\x12\xfa\x12\xfaR\xfdR\xfd\x00\x02\xff\x01\x8b\x06\x8c\x06g\ti\t\xdf\t\xdf\t\x8f\x08\x8e\x08\xbb\x06\xba\x06\xe6\x05\xe6\x05/\x07/\x07X\nW\nC\x0eC\x0eN\x11O\x11\xd5\x11\xd6\x11[\x0f[\x0f\xac\n\xac\n\xa6\x05\xa7\x05\xa5\x02\xa5\x02\xdd\x02\xdd\x02\xb1\x05\xb1\x05\xe4\x08\xe4\x08\xc4\t\xc4\t\xc0\x06\xc0\x06\x87\x00\x87\x00\xda\xf9\xd9\xf9\xe0\xf5\xdf\xf5X\xf6Y\xf6\x9c\xfa\x9c\xfa\x02\x00\x01\x00\x96\x03\x97\x03\xd7\x03\xd7\x03E\x01E\x01&\xfe&\xfe\xea\xfc\xea\xfc\xb9\xfe\xbb\xfe\xf3\x02\xf3\x02\xe1\x07\xe0\x07\xa8\x0b\xa8\x0b<\r<\r\xba\x0c\xbb\x0c\xe9\n\xea\n\xd5\x08\xd6\x08_\x07^\x07\xca\x06\xca\x06:\x07;\x07\x91\x08\x91\x08,\n,\n\x13\x0b\x13\x0b\x14\n\x14\n~\x0

In [21]:
import numpy as np
import pyaudio
import time
import librosa

class AudioHandler(object):
    def __init__(self):
        self.FORMAT = pyaudio.paFloat32
        self.CHANNELS = 1
        self.RATE = 44100
        self.CHUNK = 1024 * 2
        self.p = None
        self.stream = None

    def start(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=self.FORMAT,
                                  channels=self.CHANNELS,
                                  rate=self.RATE,
                                  input=True,
                                  output=False,
                                  stream_callback=self.callback,
                                  frames_per_buffer=self.CHUNK)

    def stop(self):
        self.stream.close()
        self.p.terminate()

    def callback(self, in_data, frame_count, time_info, flag):
        numpy_array = np.frombuffer(in_data, dtype=np.float32)
        librosa.feature.mfcc(numpy_array)
        return None, pyaudio.paContinue

    def mainloop(self):
        while (self.stream.is_active()): # if using button you can set self.stream to 0 (self.stream = 0), otherwise you can use a stop condition
            data = stream.read(self.CHUNK)
            data_float = np.fromstring(data , dtype=np.float32)
            data_np = np.array(data_float , dtype='d')
            # data in 1D array
            mfcc = librosa.feature.mfcc(data_np.flatten() , self.RATE)
            print(mfcc)
            time.sleep(2.0)


audio = AudioHandler()
audio.start()     # open the the stream
audio.mainloop()  # main operations with librosa
audio.stop()

OSError: [Errno -9988] Stream closed