In [2]:
cd Desktop

C:\Users\aliba\Desktop


In [4]:
cd pyAudioAnalysis-master

C:\Users\aliba\Desktop\pyAudioAnalysis-master


# Read the audio file


In [None]:
import glob
from scipy.io.wavfile import read
wavs = []
for filename in glob.glob('*.wav'):
    #print(filename)
    wavs.append(read(filename))

In [12]:
len(wavs)

52

# There are a few Python audio analysis library. Some people believe pyAudioAnalysis is one of the best python library for the following reasons:

 * Extract audio *features* and representations
 * *Classify* unknown sounds
 * *Train*, parameter tune and *evaluate* classifiers of audio segments
 * *Detect* audio events and exclude silence periods from long recordings
 * Perform *supervised segmentation* 
 * Perform *unsupervised segmentation* 
 * Apply dimensionality reduction to *visualize* audio data and content similarities
 
 source: https://github.com/tyiannak/pyAudioAnalysis/edit/master/README.md


# First, we import the audioFeatureExtraction from this library to see how it works for an example of wav file. This function returns sample rate and data:

In [9]:
import audioBasicIO
import audioFeatureExtraction
import matplotlib.pyplot as plt
[Fs, x] = audioBasicIO.readAudioFile("CF1E0001_part01.wav");
F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs);

In [20]:
Fs

32768

In [21]:
x

array([   437,    430,    285, ...,   -295,   -235, -16192], dtype=int16)

# Now we iterate through all audio files and do some feature extraction. There are a number of features listed in the following link:
https://github.com/tyiannak/pyAudioAnalysis/wiki/3.-Feature-Extraction

# We can select a few of them or work with all of the features listed above.


In [15]:
import numpy as np
import numpy
F = []
for i in range(len(wavs)):
    a = audioFeatureExtraction.stFeatureExtraction(wavs[i][1], wavs[i][0], 0.050*wavs[i][0], 0.025*wavs[i][0]);
    F = numpy.append(F,a)

In [17]:
F.shape

(20809768L,)

# Now let see how we can detect an "event" form this library:
Function silenceRemoval() from audioSegmentation.py takes an uninterrupted audio recording as input and returns segments endpoints that correspond to individual audio events. In this way, all "silent" areas of the signal are removed. This is achieved through a semi-supervised approach: first an SVM model is trained to distingush between high-energy and low-energy short-term frames. Towards this end, 10% of the highest energy frames along with the 10% of the lowest ones are used. Then, the SVM is applied (with a probabilistic output) on the whole recording and a dynamic thresholding is used to detect the active segments.
source : https://github.com/tyiannak/pyAudioAnalysis/wiki/5.-Segmentation

# Now we iterate to all of the audio files and we try to find all of the events from this library:

In [18]:
import audioBasicIO as aIO
import audioSegmentation as aS
G = []
for i in range(0,51):
    segments = aS.silenceRemoval(wavs[i][1],wavs[i][0], 0.020, 0.020, smoothWindow = 1, Weight = 1, plot = False)
    G.append(segments)

# The array "G" stores all of the events. It contains the "start" and "end" time of events for each audio file:

In [19]:
G

[[[25.640000000000001, 26.100000000000001],
  [31.640000000000001, 32.079999999999998],
  [75.239999999999995, 75.519999999999996],
  [117.7, 118.18000000000001],
  [151.92000000000002, 152.62],
  [156.74000000000001, 157.30000000000001],
  [160.14000000000001, 161.53999999999999],
  [162.66, 162.97999999999999],
  [166.59999999999999, 167.44],
  [176.20000000000002, 177.5],
  [204.90000000000001, 205.28],
  [211.44, 211.81999999999999],
  [216.53999999999999, 216.86000000000001],
  [227.46000000000001, 227.88],
  [266.38, 267.78000000000003],
  [277.75999999999999, 278.40000000000003],
  [295.57999999999998, 296.40000000000003]],
 [[25.900000000000002, 26.5],
  [58.359999999999999, 59.160000000000004],
  [60.380000000000003, 61.120000000000005],
  [96.299999999999997, 96.600000000000009],
  [115.92, 116.52],
  [131.74000000000001, 132.03999999999999],
  [134.03999999999999, 134.38],
  [138.47999999999999, 139.08000000000001],
  [147.88, 148.30000000000001],
  [157.59999999999999, 157.

# Let's count total number of events. Totally there are 1003 events in 51 audio files (selected files) ...

In [25]:
ss = 0
for i in range(len(G)):
    ss += len(G[i])
ss

1003

# Let see the start and end time of one of the audio files:


In [26]:
G[1]

[[25.900000000000002, 26.5],
 [58.359999999999999, 59.160000000000004],
 [60.380000000000003, 61.120000000000005],
 [96.299999999999997, 96.600000000000009],
 [115.92, 116.52],
 [131.74000000000001, 132.03999999999999],
 [134.03999999999999, 134.38],
 [138.47999999999999, 139.08000000000001],
 [147.88, 148.30000000000001],
 [157.59999999999999, 157.92000000000002],
 [170.02000000000001, 170.74000000000001],
 [173.68000000000001, 174.18000000000001],
 [186.78, 187.0],
 [193.84, 194.58000000000001],
 [196.56, 197.38],
 [263.39999999999998, 263.60000000000002],
 [266.39999999999998, 266.68000000000001],
 [287.84000000000003, 288.06],
 [290.34000000000003, 290.98000000000002],
 [294.25999999999999, 294.68000000000001]]

In [27]:
len(G[1])

20