# Environmental Sound Classification using Deep Learning
## >> Data Preprocessing

You may need to install librosa using pip as follows:

> **!pip install librosa==0.8.0**


In [1]:
import os

import cv2 as cv
import librosa
import numpy as np
import pandas as pd

from tqdm import tqdm

---

## 0. Download and extract audio data


In [2]:
USE_GOOGLE_COLAB = True
ROOT_FOLDER_NAME = 'DeepSound'

if USE_GOOGLE_COLAB:
    
    from google.colab import drive 
    drive.mount('/content/drive')

   
    %cd /content/drive/'My Drive'

%mkdir $ROOT_FOLDER_NAME
%cd $ROOT_FOLDER_NAME

Mounted at /content/drive
/content/drive/My Drive
/content/drive/My Drive/DeepSound


In [3]:
# UrbanSound8K verisi olmayanlar için. Zip dosyasında gerekli ses dosyaları olacağı için kodu çalıştırmaya gerek yoktur. 

DOWNLOAD_DATASET = True
EXTRACT_DATASET = True

DATASET_URL = "https://goo.gl/8hY5ER"

if DOWNLOAD_DATASET:
    !wget $DATASET_URL

if EXTRACT_DATASET:
    !tar xf 8hY5ER

--2022-10-07 13:48:03--  https://goo.gl/8hY5ER
Resolving goo.gl (goo.gl)... 173.194.214.139, 173.194.214.101, 173.194.214.138, ...
Connecting to goo.gl (goo.gl)|173.194.214.139|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz [following]
--2022-10-07 13:48:03--  https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz
Resolving zenodo.org (zenodo.org)... 188.184.117.155
Connecting to zenodo.org (zenodo.org)|188.184.117.155|:443... connected.
HTTP request sent, awaiting response... ^C
tar: 8hY5ER: Cannot open: No such file or directory
tar: Error is not recoverable: exiting now


---

In [4]:
US8K_AUDIO_PATH = os.path.abspath('/content/drive/MyDrive/UrbanSound8K/audio')
US8K_METADATA_PATH = os.path.abspath('/content/drive/MyDrive/UrbanSound8K/metadata/UrbanSound8K.csv')

In [5]:

us8k_metadata_df = pd.read_csv(US8K_METADATA_PATH,
                               usecols=["slice_file_name", "fold", "classID"],
                               dtype={"fold": "uint8", "classID" : "uint8"})

us8k_metadata_df

Unnamed: 0,slice_file_name,fold,classID
0,100032-3-0-0.wav,5,3
1,100263-2-0-117.wav,5,2
2,100263-2-0-121.wav,5,2
3,100263-2-0-126.wav,5,2
4,100263-2-0-137.wav,5,2
...,...,...,...
8727,99812-1-2-0.wav,7,1
8728,99812-1-3-0.wav,7,1
8729,99812-1-4-0.wav,7,1
8730,99812-1-5-0.wav,7,1


---

## 1. Feature Extraction


###### Extract a log-mel spectrogram for each audio file in the dataset and store it into a Pandas DataFrame along with its class and fold label.

In [6]:
HOP_LENGTH = 512        
WINDOW_LENGTH = 512     
N_MEL = 128             


def compute_melspectrogram_with_fixed_length(audio, sampling_rate, num_of_samples=128):
    try:
        
        
        melspectrogram = librosa.feature.melspectrogram(y=audio, 
                                                        sr=sampling_rate, 
                                                        hop_length=HOP_LENGTH,
                                                        win_length=WINDOW_LENGTH, 
                                                        n_mels=N_MEL)
        


        
        
        melspectrogram_db = librosa.power_to_db(melspectrogram, ref=np.max)
       
        
        
        melspectrogram_length = melspectrogram_db.shape[1]
        
        

        

        if melspectrogram_length != num_of_samples:
            melspectrogram_db = librosa.util.fix_length(melspectrogram_db, 
                                                        size=num_of_samples, 
                                                        axis=1, 
                                                        constant_values=(0, -80.0))
            
            
            
    except Exception as e:
        print("\nError encountered while parsing files\n>>", e)
        return None
    
    return melspectrogram_db

In [7]:
def compute_with_grayscale(melspectrogram):
  try:
    
    melspectrogram = cv.cvtColor(melspectrogram, cv.COLOR_GRAY2BGR)
    grayscaled = cv.cvtColor(melspectrogram, cv.COLOR_BGR2GRAY)

  except Exception as e:
        print("\nError encountered while parsing files\n>>", e)
        return None
  return grayscaled

In [8]:
SOUND_DURATION = 2.95   

features = []


for index, row in tqdm(us8k_metadata_df.iterrows(), total=len(us8k_metadata_df)):
    file_path = f'{US8K_AUDIO_PATH}/fold{row["fold"]}/{row["slice_file_name"]}'
    audio, sample_rate = librosa.load(file_path, duration=SOUND_DURATION, res_type='kaiser_fast')
    
    melspectrogram = compute_melspectrogram_with_fixed_length(audio, sample_rate)
    
    melspectrogram = compute_with_grayscale(melspectrogram)
    
    label = row["classID"]
    fold = row["fold"]
    
    features.append([melspectrogram, label, fold])

us8k_df = pd.DataFrame(features, columns=["melspectrogram", "label", "fold"])

  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
100%|██████████| 8732/8732 [40:44<00:00,  3.57it/s]


---

### Store the data

In [9]:

WRITE_DATA = True

if WRITE_DATA:
  us8k_df.to_pickle("us8k_df.pkl")