In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Libraries

In [None]:
# important packages
	
import pandas as pd					# data manipulation using dataframes
import numpy as np					# data statistical analysis

import seaborn as sns				# Statistical data visualization
import matplotlib.pyplot as plt		# data visualisation
%matplotlib inline

In [None]:
import librosa							# package for music and audio analysis
import librosa.display

In [None]:
import IPython.display as ipd			# public api for display tool in ipython

# Importing dataset

In [None]:
raw_df = pd.read_csv("../input/urbansound8k/UrbanSound8K.csv")
df = raw_df.copy()

# Data preprocesing

## Feature Engineering

## MFCC

One popular audio feature extraction method is the Mel-frequency cepstral coefficients (MFCC), which has 39 features. The feature count is small enough to force the model to learn the information of the audio. 12 parameters are related to the amplitude of frequencies. The extraction flow of MFCC features is depicted below:

- Framing and Windowing: The continuous speech signal is blocked into frames of N samples, with adjacent frames being separated by M. The result after this step is called spectrum.

- Mel Frequency Wrapping: For each tone with a frequency f, a pitch is measured on the Mel scale. This scale uses a linear spacing for frequencies below 1000Hz and transforms frequencies above 1000Hz by using a logarithmic function.

- Cepstrum: Converting of log-mel scale back to time. This provides a good representation of a signal’s local spectral properties, with the result as MFCC features.

![](https://miro.medium.com/max/577/1*M3Fq-ltf5dkLW85xc2T6YA.png)

The MFCC features can be extracted using the Librosa Python library we installed earlier:
librosa.feature.mfcc(x, sr=sr)

Where x = time domain NumPy series and sr = sampling rate

## Feature extraction of audio sample using mfcc

Here we will be using Mel-Frequency Cepstral Coefficients(MFCC) from the audio samples. The MFCC summarises the frequency distribution across the window size, so it is possible to analyse both the frequency and time characteristics of the sound. These audio representations will allow us to identify features for classification.

In [None]:
import os, random
dir = "../input/urbansound8k/fold1/"
filename = random.choice(os.listdir(dir))
audio_file = os.path.join(dir, filename)
print(audio_file)

In [None]:
data,sample_rate = librosa.load(audio_file)

In [None]:
ipd.Audio(audio_file)

In [None]:
mfccs = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
print(mfccs.shape)

In [None]:
mfccs

In [None]:
mfccs_scaled = np.mean(mfccs.T,axis=0)
print(mfccs_scaled.shape)

In [None]:
mfccs_scaled

## Feature Extraction of all audio files

In [None]:
def feature_extractor(audio_file):
    data,sample_rate = librosa.load(audio_file, res_type='kaiser_fast')
    mfccs_file = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
    return mfccs_file

In [None]:
from tqdm import tqdm

dir = "../input/urbansound8k/"

mfccs_audio_data = []
for index,row in tqdm(df.iterrows()):
    audio_path = dir + 'fold' + str(row['fold']) + '/' + row['slice_file_name']
    class_label = row['class']
    mfccs_audio = feature_extractor(audio_path)
    mfccs_audio = np.mean(mfccs_audio.T,axis=0)
    mfccs_audio_data.append([mfccs_audio, class_label])

In [None]:
df1 =pd.DataFrame(mfccs_audio_data,columns=['audio','class'])

In [None]:
df1.head()

In [None]:
X = np.array(df1['audio'].tolist())
y = np.array(df1['class'].tolist())

In [None]:
print(X.shape, y.shape)

In [None]:
df_feature = pd.DataFrame(X)
df_target = pd.DataFrame(y, columns = ['class'])

In [None]:
df2 = pd.concat([df_feature, df_target], axis = 1)

In [None]:
df2.head()

In [None]:
df2.shape

In [None]:
df2.to_csv("./modified_URBANSOUND8K.csv")