<a href="https://colab.research.google.com/github/Anubhavrajak/Hackfest2022/blob/main/Water.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import code
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import soundfile as sf
!sudo apt-get install libportaudio2
!pip install sounddevice
import sounddevice as sd
import queue
import os

Reading package lists... Done
Building dependency tree       
Reading state information... Done
libportaudio2 is already the newest version (19.6.0-1).
The following packages were automatically installed and are no longer required:
  libnvidia-common-460 nsight-compute-2020.2.0
Use 'sudo apt autoremove' to remove them.
0 upgraded, 0 newly installed, 0 to remove and 42 not upgraded.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def extract_feature(file_name=None):
    if file_name: 
        print('Extracting', file_name)
        X, sample_rate = sf.read(file_name, dtype='float32')
    else:  
        device_info = sd.query_devices(None, 'input')
        sample_rate = int(device_info['default_samplerate'])
        q = queue.Queue()
        def callback(i,f,t,s): q.put(i.copy())
        data = []
        with sd.InputStream(samplerate=sample_rate, callback=callback):
            while True: 
                if len(data) < 100000: data.extend(q.get())
                else: break
        X = np.array(data)

    if X.ndim > 1: X = X[:,0]
    X = X.T

    # short term fourier transform
    stft = np.abs(librosa.stft(X))

    # mfcc (mel-frequency cepstrum)
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=8000, n_mfcc=40,fmin=30).T,axis=0)

    # chroma
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=8000).T,axis=0)

    # melspectrogram
    mel = np.mean(librosa.feature.melspectrogram(X, sr=8000,fmin=30).T,axis=0)

    # spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=8000,fmin=30).T,axis=0)

    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=8000,fmin=30).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz


In [None]:
def parse_audio_files(parent_dir,file_ext='*.wav'):
    sub_dirs = os.listdir(parent_dir)
    sub_dirs.sort()
    features, labels = np.empty((0,193)), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):
        if os.path.isdir(os.path.join(parent_dir, sub_dir)):
            for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
                try: mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
                except Exception as e:
                    print("[Error] extract feature error in %s. %s" % (fn,e))
                    continue
                ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
                features = np.vstack([features,ext_features])
                # labels = np.append(labels, fn.split('/')[1])
                labels = np.append(labels, label)
            print("extract %s features done" % (sub_dir))
    return np.array(features), np.array(labels, dtype = np.int)

In [None]:
def parse_predict_files(parent_dir,file_ext='*.wav'):
    features = np.empty((0,193))
    filenames = []
    for fn in glob.glob(os.path.join(parent_dir, file_ext)):
        mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
        ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
        features = np.vstack([features,ext_features])
        filenames.append(fn)
        print("extract %s features done" % fn)
    return np.array(features), np.array(filenames)


In [None]:
features, labels = parse_audio_files('/content/drive/MyDrive/Water dataset')
np.save('input.npy', features)
np.save('target.npy', labels)


Extracting /content/drive/MyDrive/Water dataset/Leak/008_12854-20180811.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/004_07573-20180817.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/006_07573-20180831.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/007_07573-20180905.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/005_07573-20180826.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/009_05494-20180810.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/003_05593-20180927.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/012_16174-20171130.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/011_05576-20171123.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/015_05557-20190321.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/016_12835-20180523.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/018_05569-20181002.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/019_05500-20180809.wav
Extracting /



Extracting /content/drive/MyDrive/Water dataset/Leak/1.2.04.0415.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.3.01.0330.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.3.03.0400.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.3.02.0345.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.3.04.0415.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.4.02.0345.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.4.01.0330.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.4.03.0400.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.4.04.0415.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.5.02.0345.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.5.03.0400.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.5.01.0330.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.5.04.0415.wav
Extracting /content/drive/MyDrive/Water dataset/Leak/1.6.01.0330.wav
Extracting /content/drive/MyDrive/

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


In [None]:
import numpy as np
import sklearn
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Load data from numpy file
X =  np.load('/content/input.npy')
y =  np.load('/content/target.npy').ravel()
X


array([[-3.18695770e+02,  2.83387890e+01, -1.77697983e+02, ...,
        -7.61210736e-02,  5.94400751e-02, -3.05595007e-02],
       [-2.16284351e+01,  1.71329403e+01, -3.45241776e+01, ...,
         1.01562794e-01,  4.85737379e-02,  2.55348789e-02],
       [ 6.95381451e+00, -1.09342461e+01, -2.20756149e+01, ...,
        -1.04761159e-02, -9.46627314e-04, -5.16663350e-03],
       ...,
       [-2.20766357e+02,  1.73623352e+02, -4.67773666e+01, ...,
        -7.99292337e-02,  4.22067423e-02,  1.18409494e-02],
       [-4.36167450e+02,  1.43225372e+02,  2.67685375e+01, ...,
        -3.74222683e-01, -8.13948942e-02, -9.98220378e-02],
       [-1.29109589e+02,  1.21003771e+01, -3.20790100e+00, ...,
         4.57616869e-03, -1.81263589e-02, -1.62245667e-02]])

In [None]:
# Split data into training and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Simple SVM
print('SVM model has started fitting ')
clf = SVC(C=5.0, gamma=0.0001,verbose = 1)
# clf = SVC(kernel='linear', random_state=32)
clf.fit(X_train, y_train)
acc = clf.score(X_test, y_test)
print("acc=%0.4f" % acc)

SVM model has started fitting 
[LibSVM]acc=0.8571
