In [1]:
import numpy as np
import pyaudio
import time
import librosa
import matplotlib.pyplot as plt
import librosa.display

In [14]:
class AudioHandler(object):
    def __init__(self):
        self.FORMAT = pyaudio.paFloat32
        self.CHANNELS = 1
        self.RATE = 48000
        self.CHUNK = 1024 * 2
        self.p = None
        self.stream = None

    def start(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=self.FORMAT,
                                  channels=self.CHANNELS,
                                  rate=self.RATE,
                                  input=True,
                                  output=False,
                                  stream_callback=self.callback,
                                  frames_per_buffer=self.CHUNK)

    def stop(self):
        self.stream.close()
        self.p.terminate()

    def callback(self, in_data, frame_count, time_info, flag):
        numpy_array = np.frombuffer(in_data, dtype=np.float32)
        librosa.feature.mfcc(numpy_array)
        return None, pyaudio.paContinue

    def mainloop(self):
        while (self.stream.is_active()): # if using button you can set self.stream to 0 (self.stream = 0), otherwise you can use a stop condition
            #data = stream.read(self.CHUNK)
            #data_float = np.fromstring(data , dtype=np.float32)
            #data_np = np.array(data_float , dtype='d')
            #data in 1D array
            #mfcc = librosa.feature.mfcc(data_np.flatten() , self.RATE)
            #self.callback(self.stream, 10, 10, 10)
            #print(mfcc)
            time.sleep(2.0)


audio = AudioHandler()
audio.start()     # open the the stream
#audio.mainloop()  # main operations with librosa
audio.stop()

In [2]:
def create_spectrogram(y, sample_rate):
    
    plt.interactive(False)
    #clip, sample_rate = librosa.load(filename, sr=None)
        
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(spectrogram, ref=np.max))

    
    #filename = path
    #plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()    
    fig.clf()
    plt.close(fig)
    plt.close('all')
    
    return spectrogram
    del filename,clip,sample_rate,fig,ax,S

In [3]:
import numpy as np
import librosa

def feature_extract(y, sr):
    """
    Define function that takes in a file an returns features in an array
    """
    
    #get wave representation
    #y, sr = librosa.load(file)
    #print(y)    
    #determine if instruemnt is harmonic or percussive by comparing means
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    if np.mean(y_harmonic)>np.mean(y_percussive):
        harmonic=1
    else:
        harmonic=0
        
    #Mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    #temporal averaging
    mfcc=np.mean(mfcc,axis=1)
    
    #get the mel-scaled spectrogram
    spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,fmax=8000)  
    #temporally average spectrogram
    spectrogram = np.mean(spectrogram, axis = 1)
    
    #compute chroma energy
    chroma = librosa.feature.chroma_cens(y=y, sr=sr)
    #temporally average chroma
    chroma = np.mean(chroma, axis = 1)
    
    #compute spectral contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast = np.mean(contrast, axis= 1)
    
    return [harmonic, mfcc, spectrogram, chroma, contrast]

In [4]:
import pyaudio
import wave

CHUNK = 1024
FORMAT = pyaudio.paFloat32
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 1
#WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")
# print(type(frames))
# print(type(frames[0]))
# print(frames[0])


all_frames = b''.join(frames)
print(type(all_frames))

# byte_sample = []
# for chunk in frames: 
#     byte_sample.append(chunk)

# print(type(byte_sample[0]))
# print(byte_sample[0])

numpy_array = np.frombuffer(all_frames, dtype=np.float32)
#print(librosa.feature.mfcc(numpy_array))
#print(numpy_array)
feature = feature_extract(numpy_array, RATE)
#print(feature)
sp = create_spectrogram(numpy_array, RATE)


# import matplotlib.pyplot as plt
# fig, ax = plt.subplots()
# img = librosa.display.specshow(feature[0], x_axis='time', ax=ax)
# fig.colorbar(img, ax=ax)
# ax.set(title='MFCC')

stream.stop_stream()
stream.close()
p.terminate()


# wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
# wf.setnchannels(CHANNELS)
# wf.setsampwidth(p.get_sample_size(FORMAT))
# wf.setframerate(RATE)
# wf.writeframes(b''.join(frames))
# wf.close()


* recording
* done recording
<class 'bytes'>




In [5]:
print(sp)

[[3.9706931e-09 2.4142215e-09 1.3809435e-08 ... 1.5074582e-04
  4.2552149e-04 4.2727668e-04]
 [1.3827980e-09 3.1443972e-09 1.6662636e-08 ... 4.3864486e-05
  1.6806768e-04 6.3795713e-04]
 [8.4085500e-10 6.0934469e-10 1.6752608e-08 ... 1.5807342e-06
  1.3177765e-04 5.0049485e-04]
 ...
 [5.8839311e-09 5.0183311e-09 1.2089104e-08 ... 7.9766269e-06
  1.5724194e-05 2.0730360e-05]
 [5.8050471e-09 7.0301795e-09 1.1833212e-08 ... 8.1627859e-06
  1.8344264e-05 2.0924334e-05]
 [8.7152143e-09 9.2973949e-09 8.7624397e-09 ... 2.2256952e-06
  3.1861309e-06 3.9871452e-06]]


In [6]:
librosa.display.specshow(librosa.power_to_db(sp, ref=np.max))

<matplotlib.collections.QuadMesh at 0x250d56da2b0>

In [25]:
from tensorflow.keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
#from data_prep import valik
import pandas as pd
from sklearn.metrics import confusion_matrix
from PIL import Image, ImageFont, ImageDraw
import numpy as np

path = load_model(r'D:\save_1.h5')

model = load_model('/home/aigaf/Downloads/Telegram Desktop/save_1.h5')


# path_to_img = 'cut1.jpg'
# img = Image.open(path_to_img)
# img_arr = np.asarray(img)
img_arr = sp
# print(img_arr.shape, img_arr)
img_norm = img_arr/255
img_size = img_norm.resize((150, 150, 3))
img_compl = img_norm.reshape(-1, 150, 150, 3)
# print(img_compl)

#results = model.evaluate(valik)
#print('loss, accuracy =', results)


final_res = model.predict(img_compl)
print(final_res)
# data = {'y_Actual':    [ tuple(i) for i in valik[1][1]],
#         'y_Predicted': [ tuple(i) for i in final_res]
#         }
#
# df = pd.DataFrame(data, columns=['y_Actual','y_Predicted'])
# print (df)

[[0.01024908 0.9900153 ]]
