In [3]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import cv2

import tensorflow as tf
from sklearn.model_selection import train_test_split

## Preprocessing Mel Spectrograms

The following cells convert Mel spectrograms to grayscale, resize and normalize them. The output is used to create train, test and validation datasets.

In [4]:
# Performing all necessary operations by the input as image_file
def image_processor(image_file):
    size = (64,64)
    
    # Read the created spectrograms:
    img = cv2.imread(image_file)
    
    # Grayscaling:
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Resizing:
    gray_img_small = cv2.resize(gray_img, size)

    # Normalize:
    normalizedImg = np.zeros((100, 100))
    gray_img_small_normalized = cv2.normalize(gray_img_small,  normalizedImg, 0, 255, cv2.NORM_MINMAX)

    return gray_img_small_normalized

In [5]:
# Again, we used metadata file to extract class id by matching filename and class id.
# Metadata.csv --> Pandas dataframe
metaDF = pd.read_csv(r"C:\Users\kemal\Desktop\UrbanSound8K\metadata\UrbanSound8K.csv")

In [9]:
metaDF.sample(n = 10)

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
7318,6902-2-0-26.wav,6902,13.0,17.0,1,7,2,children_playing
6772,54898-8-0-4.wav,54898,48.992301,52.992301,2,3,8,siren
6409,42117-8-0-0.wav,42117,88.660252,92.660252,2,3,8,siren
3970,177756-2-0-12.wav,177756,6.0,10.0,1,4,2,children_playing
8365,89443-9-0-48.wav,89443,24.0,28.0,1,7,9,street_music
4433,182800-2-0-0.wav,182800,1.405558,5.405558,1,1,2,children_playing
4095,178686-0-0-6.wav,178686,3.0,7.0,2,5,0,air_conditioner
4509,184805-0-0-99.wav,184805,49.5,53.5,2,6,0,air_conditioner
5961,28808-1-0-9.wav,28808,4.5,8.5,1,10,1,car_horn
8039,77774-4-0-2.wav,77774,1.0,5.0,1,5,4,drilling


In [77]:
# Finding the class of image using file name
def class_finder(image_file):
    name = str(image_file).split(".")[0]
    name = name.split("\\")[-1]
    sound_name = name + ".wav"

    
    class_id= metaDF.classID.loc[metaDF['slice_file_name'] == sound_name]

    return int(class_id)

In [22]:
# Create empty arrays and then append them using 
X = [] # For features, i.e., image file pixels
y = [] # Target, i.e., class_id

In [23]:
# Function to append X and y arrays
def data_creator(image_file):

    processed_image = image_processor(image_file)
    class_id = class_finder(image_file)

    X.append(processed_image) #Features

    y.append(class_id) #Target

    return X,y


In [23]:
# Test:
classID = class_finder("7061-6-0-0.png")

print(classID)

6


## Creating dataset and splitting them into train, test, and validation

In [72]:
spect_path = r"C:\Users\kemal\Desktop\UrbanSound8K\data"

folder_names = os.listdir(spect_path)

In [73]:
spect_folder_paths = []
for name in folder_names:
    joined_path = os.path.join(spect_path,name )
    spect_folder_paths.append(joined_path)

print(spect_folder_paths)

['C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\0', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\1', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\2', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\3', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\4', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\5', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\6', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\7', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\8', 'C:\\Users\\kemal\\Desktop\\UrbanSound8K\\data\\9']


In [74]:
X = []
y = []

In [78]:
for folders in spect_folder_paths:
    file_list = os.listdir(folders)
    
    for file in file_list:
        file_path = os.path.join(folders,file)
        data_creator(file_path)

In [79]:
X = np.array(X)
y = np.array(y)

In [83]:
# Creating test, train, and validation datesets.
X_train, X_tv, y_train, y_tv = train_test_split(X, y, train_size=0.8) #random_state=35
X_test, X_val, y_test, y_val = train_test_split(X_tv, y_tv, test_size=0.5)

In [84]:
# Print out the lengths of the sets:
print(f"Length of X_train: {len(X_train)} \nLength of y_train: {len(y_train)}\nLength of X_test: {len(X_test)} \nLength of y_test: {len(y_test)}\nLength of X_val: {len(X_val)} \nLength of y_test: {len(y_val)}")

Length of X_train: 6985 
Length of y_train: 6985
Length of X_test: 873 
Length of y_test: 873
Length of X_val: 874 
Length of y_test: 874


In [88]:
# Save them to give as an input to CNNs
np.save('X_test.npy', X_test, allow_pickle=True)

In [89]:
np.save('X_train.npy', X_train, allow_pickle=True)
np.save('X_val.npy', X_val, allow_pickle=True)
np.save('y_train.npy', y_train, allow_pickle=True)
np.save('y_test.npy', y_test, allow_pickle=True)
np.save('y_val.npy', y_val, allow_pickle=True)