In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import random

In [2]:
# load dataset
data = pd.read_csv("/Users/firdause/Downloads/Gender Voice Recognition/voice.csv")

In [3]:
data.head()

Unnamed: 0,meanfreq,sd,median,Q25,Q75,IQR,skew,kurt,sp.ent,sfm,...,centroid,meanfun,minfun,maxfun,meandom,mindom,maxdom,dfrange,modindx,label
0,0.059781,0.064241,0.032027,0.015071,0.090193,0.075122,12.863462,274.402906,0.893369,0.491918,...,0.059781,0.084279,0.015702,0.275862,0.007812,0.007812,0.007812,0.0,0.0,male
1,0.066009,0.06731,0.040229,0.019414,0.092666,0.073252,22.423285,634.613855,0.892193,0.513724,...,0.066009,0.107937,0.015826,0.25,0.009014,0.007812,0.054688,0.046875,0.052632,male
2,0.077316,0.083829,0.036718,0.008701,0.131908,0.123207,30.757155,1024.927705,0.846389,0.478905,...,0.077316,0.098706,0.015656,0.271186,0.00799,0.007812,0.015625,0.007812,0.046512,male
3,0.151228,0.072111,0.158011,0.096582,0.207955,0.111374,1.232831,4.177296,0.963322,0.727232,...,0.151228,0.088965,0.017798,0.25,0.201497,0.007812,0.5625,0.554688,0.247119,male
4,0.13512,0.079146,0.124656,0.07872,0.206045,0.127325,1.101174,4.333713,0.971955,0.783568,...,0.13512,0.106398,0.016931,0.266667,0.712812,0.007812,5.484375,5.476562,0.208274,male


In [4]:
# # create more data
# for i in range(6):
#     copy = voice
#     copy['meanfreq']=copy['meanfreq']+random.gauss(.0001,.001) # add noice to mean freq var
#     voice=voice.append(copy,ignore_index=True) # make voice df 2x as big
#     print("shape of df after {0}th intertion of this loop is {1}".format(i,voice.shape))

In [5]:
# select our target class
label = data.pop("label")

# converts features from dataframe to np array
features = data.values

# convert train labels to one hots
one_hot_labels = pd.get_dummies(label)

# make np array
np_one_hot_labels = one_hot_labels.values

# split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(features, np_one_hot_labels, test_size=0.3)

# convert the training and test set into numpy array
# Tensorflow requires data in the form of numpy array
# numpy array training set
np_X_train = np.array(X_train,dtype='float32')
np_y_train = np.array(y_train,dtype='float32')

# numpy array testing set
np_X_test = np.array(X_test,dtype='float32')
np_y_test = np.array(y_test,dtype='float32')

In [6]:
print(np_one_hot_labels)

[[0 1]
 [0 1]
 [0 1]
 ..., 
 [1 0]
 [1 0]
 [1 0]]


In [7]:
print("Training set shape: ", np_X_train.shape)
print("Testing set shape: ", np_X_test.shape)

Training set shape:  (2217, 20)
Testing set shape:  (951, 20)


# Construct Neural Network (MLP)

In [8]:
# parameters
total_input = X_train.shape[1]
total_output = y_train.shape[1]

learning_rate = 0.001
total_epochs = 20000

In [9]:
# place holder for inputs and outputs
x = tf.placeholder("float", [None, total_input])
y = tf.placeholder("float", [None, total_output])

In [10]:
# weight and bias updates
w1 = tf.Variable(tf.random_normal([20, 10], stddev=.5, name='w1'))
b1 = tf.Variable(tf.random_normal([10]))

w2 = tf.Variable(tf.random_normal([10, 10], stddev=.5, name='w2'))
b2 = tf.Variable(tf.random_normal([10]))

w3 = tf.Variable(tf.random_normal([10, 10], stddev=.5, name='w3'))
b3 = tf.Variable(tf.random_normal([10]))

w4 = tf.Variable(tf.random_normal([10, 2], stddev=.5, name='w4'))
b4 = tf.Variable(tf.random_normal([2]))

In [11]:
# hidden layers and RELU activation functions
layer_1 = tf.add(tf.matmul(x, w1), b1)
layer_1 = tf.nn.relu(layer_1)

layer_2 = tf.add(tf.matmul(layer_1, w2), b2)
layer_2 = tf.nn.relu(layer_2)

layer_3 = tf.add(tf.matmul(layer_2, w3), b3)
layer_3 = tf.nn.relu(layer_3)

# final layer does not have activation function!
output_layer = tf.add(tf.matmul(layer_3, w4), b4)

y_ = tf.nn.softmax(output_layer)

In [12]:
# cost function
loss = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(y_, y, name='cross_entropy')))

In [13]:
# back-propagation via Adam optimizer
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

# train step which minimizes the weight and bias variables
train_step = opt.minimize(loss, var_list=[w1, b1, w2, b2, w3, b3, w4, b4])

In [14]:
# accuracy metric
tf_correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))

In [15]:
# obtain mini batch
def get_mini_batch(x,y):
    rows=np.random.choice(x.shape[0], 100)
    return x[rows], y[rows]

In [16]:
with tf.Session() as sess:    
    # init all variables
    init = tf.global_variables_initializer()
    sess.run(init)
    
    for i in range(total_epochs):
        # get mini batch
        a, b = get_mini_batch(X_train, y_train)

        # run train step, feeding arrays of 100 rows each time
        _, cost = sess.run([train_step, loss], feed_dict={x: a, y: b})

        if i % 100 == 0:
            print("Epoch: {0} and Loss: {1}".format(i, cost))
    
    # benchmark neural network performance
    result = sess.run(tf_accuracy, feed_dict={x: X_test, y: y_test})
    print()
    print("Test accuracy: {}".format(result))

Epoch: 0 and Loss: 83.37107849121094
Epoch: 100 and Loss: 68.99403381347656
Epoch: 200 and Loss: 61.23981475830078
Epoch: 300 and Loss: 54.65131759643555
Epoch: 400 and Loss: 60.84671401977539
Epoch: 500 and Loss: 50.829612731933594
Epoch: 600 and Loss: 50.65763854980469
Epoch: 700 and Loss: 57.19831848144531
Epoch: 800 and Loss: 50.58240509033203
Epoch: 900 and Loss: 47.05940628051758
Epoch: 1000 and Loss: 46.87858963012695
Epoch: 1100 and Loss: 42.5968132019043
Epoch: 1200 and Loss: 45.7943115234375
Epoch: 1300 and Loss: 42.34330749511719
Epoch: 1400 and Loss: 43.73741912841797
Epoch: 1500 and Loss: 43.693450927734375
Epoch: 1600 and Loss: 50.16815185546875
Epoch: 1700 and Loss: 44.69630432128906
Epoch: 1800 and Loss: 42.6500244140625
Epoch: 1900 and Loss: 45.26355743408203
Epoch: 2000 and Loss: 41.60751724243164
Epoch: 2100 and Loss: 39.9048957824707
Epoch: 2200 and Loss: 40.03375244140625
Epoch: 2300 and Loss: 39.78605651855469
Epoch: 2400 and Loss: 44.78984832763672
Epoch: 2500 an