In [1]:
import numpy as np

In [None]:
import pandas as pd

In [143]:
import matplotlib.pyplot as plt

In [144]:
data = pd.read_csv('voice.csv')

In [145]:
data.label = [1 if each =="female" else 0 for each in data.label]
data.label.values

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [146]:
y = data.label.values
x_data = data.drop(['label'], axis = 1)

In [147]:
#normalization
#(x-min(x))/(max(x)-min(x))
x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)).values

In [148]:
#train test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size = 0.2, random_state = 42)

In [149]:
print(x_train.shape)

(2534, 20)


In [150]:
#find transpose
train_voices = x_train.T
test_voices = x_test.T
train_labels = y_train.T
test_labels = y_test.T
print("train_voices shape:", train_voices.shape)
print("test_voices shape:", test_voices.shape)
print("train_labels shape:", train_labels.shape)
print("test_labels shape:", test_labels.shape)

train_voices shape: (20, 2534)
test_voices shape: (20, 634)
train_labels shape: (2534,)
test_labels shape: (634,)


In [151]:
train_voices = train_voices.values
test_voices = test_voices.values

In [152]:
print(type(train_labels))
print(type(train_voices))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [153]:
def softmax(x):
    max_elem = np.max(x, axis = 0)
    y = x - max_elem
    y = np.exp(y)
    sums = np.sum(y, axis = 0)
    return y / sums

In [154]:
def one_hot_encoding(y_label):
    C = int(y_label.max() + 1)
    enc = np.zeros((C, y_label.size))
    enc[y_label.astype(int), np.arange(y_label.size)] = 1
    return enc

In [155]:
eps = 1e-10

In [156]:
class NN:
    def __init__(self, sizes):
        self.W0 = np.random.randn(sizes[1], sizes[0])
        self.W1 = np.random.randn(sizes[2], sizes[1])
        self.mW0 = np.zeros_like(self.W0)
        self.mW1 = np.zeros_like(self.W1)
        
    def train(self, X, y_label, epochs = 10, alpha = 0.1, beta = 0.8):
        y_one_hot = one_hot_encoding(y_label)
        for epoch in range(epochs):
            a0 = self.W0 @ X
            z0 = np.maximum(a0, 0)
            a1 = self.W1 @ z0
            y = softmax(a1)
            
            loss = - (np.log(y + eps) * y_one_hot).sum() / y_label.size
            acc = (np.argmax(y, axis = 0) == y_label).sum() / y_label.size
            
            d_a_1 = y - y_one_hot
            gradW1 = d_a_1 @ z0.T
            d_z_0 = self.W1.T @ d_a_1
            d_a_0 = d_z_0.copy()
            d_a_0[a0<0] = 0
            gradW0 = d_a_0 @ X.T
            
            self.mW0 = beta * self.mW0 - alpha * gradW0
            self.mW1 = beta * self.mW1 - alpha * gradW1
            
            self.W0 += self.mW0
            self.W1 += self.mW1
            
            print("loss: ", loss, " acc: ", acc * 100, "%")
            
    def stoch_train(self, X, y_label, epochs = 10, alpha = 0.1, beta = 0.8, mbSize = 100):
        y_one_hot = one_hot_encoding(y_label)
        iters_per_epoch = int(y_label.size / mbSize)
        indices = np.arange(y_label.size)
        for epoch in range(epochs):
            np.random.shuffle(indices)
            X_sh = X[:, indices]
            y_one_sh = y_one_hot[:, indices]
            
            for it in range(iters_per_epoch):
                X_iter = X_sh[:, it*mbSize:(it+1)*mbSize]
                y_one_iter = y_one_sh[:, it*mbSize:(it+1)*mbSize]
                
                
            
                a0 = self.W0 @ X_iter
                z0 = np.maximum(a0, 0)
                a1 = self.W1 @ z0
                y = softmax(a1)


                d_a_1 = y - y_one_iter
                gradW1 = d_a_1 @ z0.T
                d_z_0 = self.W1.T @ d_a_1
                d_a_0 = d_z_0.copy()
                d_a_0[a0<0] = 0
                gradW0 = d_a_0 @ X_iter.T

                self.mW0 = beta * self.mW0 - alpha * gradW0
                self.mW1 = beta * self.mW1 - alpha * gradW1

                self.W0 += self.mW0
                self.W1 += self.mW1
            loss, acc = self.evaluate(X, y_label)
            print("loss: ", loss, " acc: ", acc * 100, "%")
    def evaluate(self, X, y_label):
        y_one_hot = one_hot_encoding(y_label)
        a0 = self.W0 @ X
        z0 = np.maximum(a0, 0)
        a1 = self.W1 @ z0
        y = softmax(a1)
        loss = - (np.log(y + eps) * y_one_hot).sum() / y_label.size
        acc = (np.argmax(y, axis = 0) == y_label).sum() / y_label.size
        return loss, acc

In [174]:
voice_net = NN([20, 70, 2])

In [175]:
voice_net.train(train_voices, train_labels, beta = 0.9, alpha = 1e-6, epochs = 20)

loss:  11.557710370200141  acc:  49.21073401736385 %
loss:  11.514598586984087  acc:  49.21073401736385 %
loss:  11.401698716994071  acc:  49.21073401736385 %
loss:  11.14492517142731  acc:  49.21073401736385 %
loss:  10.598946265984614  acc:  49.21073401736385 %
loss:  9.576335131286248  acc:  49.13180741910024 %
loss:  8.010281738360646  acc:  49.09234411996843 %
loss:  6.092933865155335  acc:  48.34254143646409 %
loss:  4.146662978982139  acc:  44.317284925019734 %
loss:  2.7643928778503137  acc:  37.72691397000789 %
loss:  2.688532007492353  acc:  41.75217048145225 %
loss:  3.5882641711017134  acc:  50.0394632991318 %
loss:  4.580847070562602  acc:  51.53906866614049 %
loss:  5.300035989404633  acc:  51.42067876874506 %
loss:  5.678841924082444  acc:  51.34175217048145 %
loss:  5.730951543299147  acc:  51.34175217048145 %
loss:  5.488884276783666  acc:  51.46014206787688 %
loss:  4.9920503522963156  acc:  52.05209155485399 %
loss:  4.2944697292293315  acc:  52.1310181531176 %
loss:

In [176]:
voice_net.stoch_train(train_voices, train_labels, alpha = 1e-3, epochs = 20, beta = 0.9, mbSize = 100)

loss:  0.22630269827254051  acc:  92.38358326756116 %
loss:  0.11459997086907478  acc:  96.13259668508287 %
loss:  0.10174352893781655  acc:  97.00078926598263 %
loss:  0.09761086128771324  acc:  97.43488555643252 %
loss:  0.09677611429700117  acc:  97.39542225730071 %
loss:  0.08663537954397375  acc:  97.63220205209156 %
loss:  0.08307151048229325  acc:  96.96132596685084 %
loss:  0.08019438986485637  acc:  97.19810576164167 %
loss:  0.07840529137965324  acc:  97.07971586424625 %
loss:  0.07317925908913643  acc:  97.79005524861878 %
loss:  0.06980792336756103  acc:  98.02683504340963 %
loss:  0.06800243790175664  acc:  98.06629834254143 %
loss:  0.06766959300220997  acc:  98.14522494080505 %
loss:  0.06434749857887273  acc:  98.10576164167324 %
loss:  0.06930509512786359  acc:  97.43488555643252 %
loss:  0.06623029923431699  acc:  97.86898184688239 %
loss:  0.06391637634786887  acc:  97.86898184688239 %
loss:  0.07555009006189357  acc:  97.90844514601422 %
loss:  0.06409175740240346  

In [177]:
loss, acc = voice_net.evaluate(test_voices, test_labels)
print("loss: ", loss, " acc: ", acc * 100, "%")

loss:  0.06950761038732944  acc:  98.26498422712933 %
