In [None]:
import numpy as np

In [None]:
class FCLayer:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size) / np.sqrt(input_size + output_size)
        self.bias = np.random.randn(1, output_size) / np.sqrt(input_size + output_size)

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.bias

    def backward(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # bias_error = output_error
        
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [None]:
class ActivationLayer:
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward(self, input):
        self.input = input
        return self.activation(input)
    
    def backward(self, output_error, learning_rate):
        return output_error * self.activation_prime(self.input)

In [None]:
# bonus
class FlattenLayer:
    def __init__(self, input_shape):
        self.input_shape = input_shape

    def forward(self, input):
        return np.reshape(input, (1, -1))
    
    def backward(self, output_error, learning_rate):
        return np.reshape(output_error, self.input_shape)

In [None]:
# bonus
class SoftmaxLayer:
    def __init__(self, input_size):
        self.input_size = input_size
    
    def forward(self, input):
        self.input = input
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output
    
    def backward(self, output_error, learning_rate):
        input_error = np.zeros(output_error.shape)
        out = np.tile(self.output.T, self.input_size)
        return self.output * np.dot(output_error, np.identity(self.input_size) - out)

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return np.exp(-x) / (1 + np.exp(-x))**2

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(x, 0)

def relu_prime(x):
    return np.array(x >= 0).astype('int')

In [None]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_pred.size

def sse(y_true, y_pred):
    return 0.5 * np.sum(np.power(y_true - y_pred, 2))

def sse_prime(y_true, y_pred):
    return y_pred - y_true

# Add out data 5 class


In [None]:
!pip install wavio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wavio
  Downloading wavio-0.0.4-py2.py3-none-any.whl (9.0 kB)
Installing collected packages: wavio
Successfully installed wavio-0.0.4


In [None]:
import matplotlib.pyplot as plt
from scipy.io import wavfile
import argparse
import os
from glob import glob
import numpy as np
import pandas as pd
from librosa.core import resample, to_mono
from tqdm import tqdm
import wavio
import math

In [None]:
import seaborn as sns
import librosa
import librosa.display
import IPython.display as ipd

from itertools import cycle

sns.set_theme(style="white", palette=None)
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]
color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])

In [None]:
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display
%matplotlib inline

# Audio Classification Data Preprocessing

In [None]:
#### Extracting MFCC's For every audio file
import pandas as pd
import os
import librosa

audio_dataset_path='/content/drive/MyDrive/project/fold1'
metadata=pd.read_excel('/content/drive/MyDrive/project/audio_labelled_data_fold.xlsx')
metadata.head(1000)

Unnamed: 0,index,audio_file,class,fold,audio_file_name
0,0,New_AS_001,AS,1,New_AS_001.wav
1,1,New_AS_002,AS,1,New_AS_002.wav
2,2,New_AS_003,AS,1,New_AS_003.wav
3,3,New_AS_004,AS,1,New_AS_004.wav
4,4,New_AS_005,AS,1,New_AS_005.wav
...,...,...,...,...,...
995,995,New_N_196,NM,5,New_N_196.wav
996,996,New_N_197,NM,5,New_N_197.wav
997,997,New_N_198,NM,5,New_N_198.wav
998,998,New_N_199,NM,5,New_N_199.wav


In [None]:
def feature_extractor(file):
  audio, sample_rate=librosa.load(filename,res_type="kaiser_fast")
  mfcc_feature=librosa.feature.mfcc(y=audio, sr=sample_rate,n_mfcc=40)
  mfcc_scaled_features=np.mean(mfcc_feature.T, axis=0)
  return mfcc_scaled_features


In [None]:
import numpy as np
from tqdm import tqdm
extracted_fe=[]
for index_num, row in tqdm(metadata.iterrows()):

  # filename=os.path.join(os.path.abspath(audio_dataset_path),"AS_New"+str(row["fold"])+'/',str(row["audio_file_name"]))
  # filename=os.path.join(os.path.abspath(audio_dataset_path),"AS_New"+'/',str(row["audio_file_name"]))
  filename = os.path.join(os.path.abspath(audio_dataset_path)+'/',str(row["audio_file_name"]))
  final_class_label=row["class"]
  data=feature_extractor(filename)
  extracted_fe.append([data,final_class_label])



1000it [00:36, 27.77it/s]


In [None]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [None]:
# now we will make this extracted data in data drame with help of Pandas
extracted_feature_df=pd.DataFrame(extracted_fe,columns=['feature', 'class'])
extracted_feature_df.head()

Unnamed: 0,feature,class
0,"[-395.81818, 102.45611, 60.57757, 43.659904, 3...",AS
1,"[-393.5559, 102.957634, 56.635506, 42.53774, 3...",AS
2,"[-396.97412, 104.47233, 57.448124, 43.25401, 3...",AS
3,"[-396.30777, 102.98417, 57.65973, 43.38382, 32...",AS
4,"[-398.91776, 103.66111, 58.0433, 43.699608, 32...",AS


In [None]:
extracted_feature_df.groupby(["class"]).count()

Unnamed: 0_level_0,feature
class,Unnamed: 1_level_1
AS,200
MR,200
MS,200
MVP,200
NM,200


In [None]:
# now we have total data to serve in model , so we will split the data in independent and dependent dat
X=np.array(extracted_feature_df['feature'].tolist())
Y=np.array(extracted_feature_df['class'].tolist())


In [None]:
X.shape

(1000, 40)

In [None]:
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf

labelencoder=LabelEncoder()
Y=tf.keras.utils.to_categorical(labelencoder.fit_transform(Y))

In [None]:
Y.shape

(1000, 5)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.15, random_state=4,shuffle=True)

In [None]:
X_train

array([[-513.8989    ,  125.70631   ,   82.28639   , ...,   -2.0780911 ,
          -2.7304258 ,   -3.1017184 ],
       [-426.68353   ,   84.629456  ,   34.08255   , ...,   -1.5761619 ,
          -1.0742233 ,   -0.53614676],
       [-429.70834   ,  112.560684  ,   72.218315  , ...,   -0.65758103,
          -1.0516802 ,   -1.617686  ],
       ...,
       [-452.10544   ,   86.217674  ,   63.76915   , ...,   -2.0065622 ,
          -2.4034054 ,   -2.4995425 ],
       [-363.56665   ,  109.84329   ,   32.45372   , ...,   -1.1279881 ,
          -1.7781953 ,   -1.7657309 ],
       [-515.31226   ,  103.10976   ,   60.88379   , ...,   -1.2050952 ,
          -1.7588692 ,   -1.8759154 ]], dtype=float32)

In [None]:
Y_train

array([[0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       ...,
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.]], dtype=float32)

In [None]:
Y_train.shape

(850, 5)

In [None]:
X_train.shape

(850, 40)

In [None]:
Y_test.shape


(150, 5)

In [None]:
# from keras.datasets import mnist
# from keras.utils import np_utils

# (x_train, y_train), (x_test, y_test) = mnist.load_data()

# x_train = x_train.astype('float32')
# x_train /= 255
# y_train = np_utils.to_categorical(y_train)
# x_train = x_train[0:1000]
# y_train = y_train[0:1000]

# x_test = x_test.astype('float32')
# x_test /= 255
# y_test = np_utils.to_categorical(y_test)

In [None]:
# y_train.shape

In [None]:
# x_train.shape

(1000, 28, 28)

In [None]:
# unlike the Medium article, I am not encapsulating this process in a separate class
# I think it is nice just like this
network = [
    FlattenLayer(input_shape=(1, 40)),
    FCLayer(40,250),
    ActivationLayer(relu, relu_prime),
    FCLayer(250,280),
    ActivationLayer(relu, relu_prime),
    FCLayer(280,120),
    ActivationLayer(relu, relu_prime),
    FCLayer(120, 5),
    SoftmaxLayer(5)
]

epochs = 280
learning_rate = 10e-4

# training
for epoch in range(epochs):
    error = 0
    for x, y_true in zip(X_train, Y_train):
        # forward
        output = x
        for layer in network:
            output = layer.forward(output)
        
        # error (display purpose only)
        error += mse(y_true, output)
        # print(error)

        # backward
        output_error = mse_prime(y_true, output)
        for layer in reversed(network):
            output_error = layer.backward(output_error, learning_rate)
    
    error /= len(X_train)
    print('%d/%d, error=%f' % (epoch + 1, epochs, error))

1/280, error=0.286680
2/280, error=0.170261
3/280, error=0.139168
4/280, error=0.124142
5/280, error=0.112150
6/280, error=0.103697
7/280, error=0.095957
8/280, error=0.089852
9/280, error=0.085503
10/280, error=0.081784
11/280, error=0.078969
12/280, error=0.076320
13/280, error=0.074110
14/280, error=0.072162
15/280, error=0.070371
16/280, error=0.068676
17/280, error=0.067283
18/280, error=0.065779
19/280, error=0.064268
20/280, error=0.063154
21/280, error=0.062055
22/280, error=0.061242
23/280, error=0.059945
24/280, error=0.058875
25/280, error=0.057602
26/280, error=0.056692
27/280, error=0.055556
28/280, error=0.055277
29/280, error=0.054468
30/280, error=0.053438
31/280, error=0.053009
32/280, error=0.052085
33/280, error=0.051467
34/280, error=0.051173
35/280, error=0.051199
36/280, error=0.050521
37/280, error=0.049601
38/280, error=0.049515
39/280, error=0.049333
40/280, error=0.048822
41/280, error=0.047922
42/280, error=0.048052
43/280, error=0.047199
44/280, error=0.0466

In [None]:
len(X_train)

850

In [None]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

accuray_test = sum([np.argmax(y) == np.argmax(predict(network, x)) for x, y in zip(X_test, Y_test)]) / len(X_test)
error = sum([mse(y, predict(network, x)) for x, y in zip(X_test, Y_test)]) / len(X_test)
print('accuray_test: %.4f' % accuray_test)
print('mse: %.4f' % error)

accuray_test: 0.9667
mse: 0.0123


In [None]:
import matplotlib.pyplot as plt
count=0
samples = 205
for test, true in zip(X_test[:samples], Y_test[:samples]):
    # image = np.reshape(test, (28, 28))
    # plt.imshow(image, cmap='binary')
    # plt.show()
    pred = predict(network, test)[0]
    idx = np.argmax(pred)
    idx_true = np.argmax(true)
    print('pred: %s, prob: %.2f, true: %d' % (idx, pred[idx], idx_true))

pred: 3, prob: 0.82, true: 3
pred: 4, prob: 0.61, true: 2
pred: 3, prob: 0.99, true: 3
pred: 3, prob: 0.96, true: 3
pred: 2, prob: 0.96, true: 2
pred: 4, prob: 1.00, true: 4
pred: 4, prob: 0.97, true: 4
pred: 4, prob: 1.00, true: 4
pred: 4, prob: 0.96, true: 4
pred: 3, prob: 0.64, true: 0
pred: 0, prob: 0.75, true: 2
pred: 1, prob: 0.98, true: 1
pred: 4, prob: 1.00, true: 4
pred: 4, prob: 0.98, true: 1
pred: 0, prob: 0.84, true: 0
pred: 4, prob: 1.00, true: 4
pred: 2, prob: 0.98, true: 2
pred: 3, prob: 0.50, true: 3
pred: 1, prob: 0.99, true: 1
pred: 4, prob: 0.99, true: 4
pred: 3, prob: 1.00, true: 3
pred: 3, prob: 0.99, true: 3
pred: 1, prob: 0.94, true: 1
pred: 2, prob: 0.99, true: 2
pred: 0, prob: 0.99, true: 0
pred: 3, prob: 0.63, true: 0
pred: 3, prob: 0.78, true: 3
pred: 0, prob: 0.86, true: 0
pred: 4, prob: 1.00, true: 4
pred: 2, prob: 0.99, true: 2
pred: 2, prob: 0.99, true: 2
pred: 0, prob: 0.88, true: 0
pred: 3, prob: 1.00, true: 3
pred: 1, prob: 0.95, true: 1
pred: 1, prob: