In [None]:
import numpy as np
import pyaudio
import time
import librosa

class AudioHandler(object):
    def __init__(self):
        self.FORMAT = pyaudio.paFloat32
        self.CHANNELS = 1
        self.RATE = 48000
        self.CHUNK = 1024 * 2
        self.p = None
        self.stream = None

    def start(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=self.FORMAT,
                                  channels=self.CHANNELS,
                                  rate=self.RATE,
                                  input=True,
                                  output=False,
                                  stream_callback=self.callback,
                                  frames_per_buffer=self.CHUNK)

    def stop(self):
        self.stream.close()
        self.p.terminate()

    def callback(self, in_data, frame_count, time_info, flag):
        numpy_array = np.frombuffer(in_data, dtype=np.float32)
        librosa.feature.mfcc(numpy_array)
        return None, pyaudio.paContinue

    def mainloop(self):
        while (self.stream.is_active()): # if using button you can set self.stream to 0 (self.stream = 0), otherwise you can use a stop condition
            #data = stream.read(self.CHUNK)
            #data_float = np.fromstring(data , dtype=np.float32)
            #data_np = np.array(data_float , dtype='d')
            #data in 1D array
            mfcc = librosa.feature.mfcc(data_np.flatten() , self.RATE)
            #self.callback(self.stream, 10, 10, 10)
            #print(mfcc)
            time.sleep(2.0)


audio = AudioHandler()
audio.start()     # open the the stream
audio.mainloop()  # main operations with librosa
audio.stop()

In [59]:
import numpy as np
import librosa

def feature_extract(y, sr):
    """
    Define function that takes in a file an returns features in an array
    """
    
    #get wave representation
    #y, sr = librosa.load(file)
    #print(y)    
    #determine if instruemnt is harmonic or percussive by comparing means
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    if np.mean(y_harmonic)>np.mean(y_percussive):
        harmonic=1
    else:
        harmonic=0
        
    #Mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    #temporal averaging
    mfcc=np.mean(mfcc,axis=1)
    
    #get the mel-scaled spectrogram
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,fmax=8000)  
    #temporally average spectrogram
    spectrogram = np.mean(spectrogram, axis = 1)
    
    #compute chroma energy
    chroma = librosa.feature.chroma_cens(y=y, sr=sr)
    #temporally average chroma
    chroma = np.mean(chroma, axis = 1)
    
    #compute spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast = np.mean(contrast, axis= 1)
    
    return [harmonic, mfcc, spectrogram, chroma, contrast]

In [61]:
import pyaudio
import wave

CHUNK = 1024
FORMAT = pyaudio.paFloat32
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 1
#WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")
#print(frames)
numpy_array = np.frombuffer(frames[0], dtype=np.float32)
#print(librosa.feature.mfcc(numpy_array))
#print(numpy_array)
feature = feature_extract(numpy_array, RATE)
print(feature[2])

stream.stop_stream()
stream.close()
p.terminate()


# wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
# wf.setnchannels(CHANNELS)
# wf.setsampwidth(p.get_sample_size(FORMAT))
# wf.setframerate(RATE)
# wf.writeframes(b''.join(frames))
# wf.close()


* recording
* done recording
[-0.00796509 -0.00799561 -0.00369263 ...  0.01696777  0.01873779
  0.01873779]
[2.47981381e+00 3.15622449e+00 3.65620494e+00 4.43671370e+00
 4.15277195e+00 1.46498239e+00 2.10968778e-01 2.93316990e-02
 1.95600428e-02 6.44073039e-02 3.99517715e-02 7.31767807e-03
 2.61063538e-02 3.19470838e-02 4.27818000e-02 6.02941290e-02
 9.71844643e-02 1.06356725e-01 1.55595928e-01 1.37487322e-01
 4.06509750e-02 1.29109221e-02 1.04808034e-02 1.47091877e-02
 3.22658420e-02 9.18382853e-02 1.24567464e-01 8.78031105e-02
 5.24471588e-02 1.75702751e-01 2.82501400e-01 2.18037248e-01
 7.12314397e-02 2.61709746e-02 1.60300769e-02 3.03600524e-02
 3.22234742e-02 1.24426913e-02 1.58767588e-02 1.62400343e-02
 1.30844591e-02 7.77086150e-03 8.03453382e-03 5.71911875e-03
 5.71739580e-03 5.17965341e-03 1.67519599e-02 2.36804355e-02
 1.14268940e-02 1.06411334e-02 2.12813132e-02 3.98887508e-02
 1.76461097e-02 3.65832751e-03 6.62364066e-03 5.87891322e-03
 3.34858196e-03 1.97961205e-03 1.66982

In [None]:
path = r'D:\reposetory\Save_Transport\DATA\cut\44.1K\4_01.wav'
feature = feature_extract(path)
#print(feature[2])