In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

In [4]:
data = pd.read_csv('voice.csv')

In [5]:
data.label = [1 if each =="female" else 0 for each in data.label]
data.label.values

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [6]:
y = data.label.values
x_data = data.drop(['label'], axis = 1)

In [7]:
#normalization
#(x-min(x))/(max(x)-min(x))
x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)).values

In [8]:
#train test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size = 0.2, random_state = 42)

In [9]:
print(x_train.shape)

(2534, 20)


In [10]:
#find transpose
train_voices = x_train.T
test_voices = x_test.T
train_labels = y_train.T
test_labels = y_test.T
print("train_voices shape:", train_voices.shape)
print("test_voices shape:", test_voices.shape)
print("train_labels shape:", train_labels.shape)
print("test_labels shape:", test_labels.shape)

train_voices shape: (20, 2534)
test_voices shape: (20, 634)
train_labels shape: (2534,)
test_labels shape: (634,)


In [11]:
train_voices = train_voices.values
test_voices = test_voices.values

In [12]:
print(type(train_labels))
print(type(train_voices))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [13]:
def softmax(x):
    max_elem = np.max(x, axis = 0)
    y = x - max_elem
    y = np.exp(y)
    sums = np.sum(y, axis = 0)
    return y / sums

In [14]:
def one_hot_encoding(y_label):
    C = int(y_label.max() + 1)
    enc = np.zeros((C, y_label.size))
    enc[y_label.astype(int), np.arange(y_label.size)] = 1
    return enc

In [15]:
eps = 1e-10

In [16]:
class NN:
    def __init__(self, sizes):
        self.W0 = np.random.randn(sizes[1], sizes[0])
        self.W1 = np.random.randn(sizes[2], sizes[1])
        self.mW0 = np.zeros_like(self.W0)
        self.mW1 = np.zeros_like(self.W1)
        
    def train(self, X, y_label, epochs = 10, alpha = 0.1, beta = 0.8):
        y_one_hot = one_hot_encoding(y_label)
        for epoch in range(epochs):
            a0 = self.W0 @ X
            z0 = np.maximum(a0, 0)
            a1 = self.W1 @ z0
            y = softmax(a1)
            
            loss = - (np.log(y + eps) * y_one_hot).sum() / y_label.size
            acc = (np.argmax(y, axis = 0) == y_label).sum() / y_label.size
            
            d_a_1 = y - y_one_hot
            gradW1 = d_a_1 @ z0.T
            d_z_0 = self.W1.T @ d_a_1
            d_a_0 = d_z_0.copy()
            d_a_0[a0<0] = 0
            gradW0 = d_a_0 @ X.T
            
            self.mW0 = beta * self.mW0 - alpha * gradW0
            self.mW1 = beta * self.mW1 - alpha * gradW1
            
            self.W0 += self.mW0
            self.W1 += self.mW1
            
            print("loss: ", loss, " acc: ", acc * 100, "%")
            
    def stoch_train(self, X, y_label, epochs = 10, alpha = 0.1, beta = 0.8, mbSize = 100):
        y_one_hot = one_hot_encoding(y_label)
        iters_per_epoch = int(y_label.size / mbSize)
        indices = np.arange(y_label.size)
        for epoch in range(epochs):
            np.random.shuffle(indices)
            X_sh = X[:, indices]
            y_one_sh = y_one_hot[:, indices]
            
            for it in range(iters_per_epoch):
                X_iter = X_sh[:, it*mbSize:(it+1)*mbSize]
                y_one_iter = y_one_sh[:, it*mbSize:(it+1)*mbSize]
                
                
            
                a0 = self.W0 @ X_iter
                z0 = np.maximum(a0, 0)
                a1 = self.W1 @ z0
                y = softmax(a1)


                d_a_1 = y - y_one_iter
                gradW1 = d_a_1 @ z0.T
                d_z_0 = self.W1.T @ d_a_1
                d_a_0 = d_z_0.copy()
                d_a_0[a0<0] = 0
                gradW0 = d_a_0 @ X_iter.T

                self.mW0 = beta * self.mW0 - alpha * gradW0
                self.mW1 = beta * self.mW1 - alpha * gradW1

                self.W0 += self.mW0
                self.W1 += self.mW1
            loss, acc = self.evaluate(X, y_label)
            print("loss: ", loss, " acc: ", acc * 100, "%")
    def evaluate(self, X, y_label):
        y_one_hot = one_hot_encoding(y_label)
        a0 = self.W0 @ X
        z0 = np.maximum(a0, 0)
        a1 = self.W1 @ z0
        y = softmax(a1)
        loss = - (np.log(y + eps) * y_one_hot).sum() / y_label.size
        acc = (np.argmax(y, axis = 0) == y_label).sum() / y_label.size
        return loss, acc

In [17]:
voice_net = NN([20, 70, 2])

In [18]:
voice_net.train(train_voices, train_labels, beta = 0.9, alpha = 1e-6, epochs = 20)

loss:  3.1518889073553455  acc:  51.499605367008684 %
loss:  2.8814925418336603  acc:  51.223362273086025 %
loss:  2.4097241329520047  acc:  50.94711917916338 %
loss:  1.8740865027810825  acc:  52.05209155485399 %
loss:  1.504813549257675  acc:  56.55090765588003 %
loss:  1.4782029768690204  acc:  61.2075769534333 %
loss:  1.6918428005574349  acc:  59.94475138121547 %
loss:  1.9174373543864598  acc:  57.14285714285714 %
loss:  2.015552818396742  acc:  56.03788476716653 %
loss:  1.9456343579726945  acc:  56.3930544593528 %
loss:  1.734650729486629  acc:  58.445146014206784 %
loss:  1.4578876240420473  acc:  62.70718232044199 %
loss:  1.2208920147251718  acc:  67.20599842146804 %
loss:  1.1150993812002745  acc:  66.4561957379637 %
loss:  1.1502475538323864  acc:  63.733228097868974 %
loss:  1.2426042294424464  acc:  61.523283346487766 %
loss:  1.2937542112648848  acc:  60.339384372533544 %
loss:  1.257234410756644  acc:  61.12865035516969 %
loss:  1.1454955034093686  acc:  64.04893449092

In [19]:
voice_net.stoch_train(train_voices, train_labels, alpha = 1e-3, epochs = 20, beta = 0.9, mbSize = 100)

loss:  0.17676745361494206  acc:  96.32991318074191 %
loss:  0.13755657500939633  acc:  96.32991318074191 %
loss:  0.11609258621577659  acc:  96.40883977900553 %
loss:  0.0942777943125705  acc:  97.31649565903709 %
loss:  0.0932497492217442  acc:  97.47434885556433 %
loss:  0.10241117713907182  acc:  97.47434885556433 %
loss:  0.08121791637168671  acc:  97.51381215469614 %
loss:  0.0839508307740768  acc:  97.86898184688239 %
loss:  0.07869941723211259  acc:  97.47434885556433 %
loss:  0.11510926694309137  acc:  95.26440410418311 %
loss:  0.07590898166243018  acc:  97.86898184688239 %
loss:  0.07203036044614956  acc:  97.79005524861878 %
loss:  0.07481121196904743  acc:  97.55327545382794 %
loss:  0.08050269645286295  acc:  96.92186266771901 %
loss:  0.07894991972789789  acc:  97.94790844514601 %
loss:  0.07165310372810221  acc:  97.39542225730071 %
loss:  0.08430531044922306  acc:  96.48776637726914 %
loss:  0.07336946828106833  acc:  98.10576164167324 %
loss:  0.07181813356165788  acc

In [20]:
loss, acc = voice_net.evaluate(test_voices, test_labels)
print("loss: ", loss, " acc: ", acc * 100, "%")

loss:  0.06133162137526981  acc:  97.79179810725552 %
