# Model Iteration - One Hot Encoder for Gender

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

from numpy import mean, std, asarray

from sklearn.model_selection import RepeatedKFold
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

print(tf.__version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


1.14.0


# Setting the data

For this second iteration, I used  the results from the audiogram test to define the settings of the devices and added the remaining features (gender, age, comp_speed, experience) without a one-hot encoder tehcnique for gender

In [2]:
inputs = pd.read_csv('input.csv')

gender = pd.get_dummies(inputs.gender, prefix='gender')
inputs = inputs.join(gender)

In [3]:
X = inputs[['age','comp_speed', 'experience', 'gender_1', 'gender_2','audr_500', 'audr_1k', 'audr_2k', 'audr_3k', 'audr_4k', 'audr_6k', 'audr_8k']].values[:-10]
x_test =  inputs[['age','comp_speed', 'experience','gender_1', 'gender_2' ,'audr_500', 'audr_1k', 'audr_2k', 'audr_3k', 'audr_4k', 'audr_6k', 'audr_8k']].values[-10:]

outputs = pd.read_csv('output.csv').iloc[:2999]
y = outputs.values[:-10]
y_test = outputs.values[-10:]

print(X.shape, y.shape)

(2989, 12) (2989, 7)


In [4]:
def evaluate_model(X, y):
    results = list()
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        # prepare data
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        # define model
        model = base_model(n_inputs, n_outputs)
        # fit model
        model.fit(X_train, y_train, verbose=0, epochs=100)
        # evaluate model on test set
        mae = model.evaluate(X_test, y_test, verbose=0)
        # store result
        print('>%.3f' % mae)
        results.append(mae)
    return results

In [5]:
def base_model(n_inputs, n_outputs):
    model = Sequential()
    
    # Input layer & hidden layer
    model.add(Dense(10, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    
    # Output layer
    model.add(Dense(n_outputs, activation='linear'))
    
    model.compile(loss='mae', optimizer='adam')

    return model

In [6]:
# evaluate model
results = evaluate_model(X, y)
# summarize performance
print('MAE: %.3f (%.3f)' % (mean(results), std(results)))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
>0.891
>0.911
>0.877
>0.888
>0.938
>0.947
>0.897
>0.958
>0.973
>0.976
>0.964
>0.862
>0.933
>0.884
>0.915
>0.805
>0.872
>0.920
>0.905
>1.055
>0.904
>0.960
>0.969
>1.193
>0.796
>0.866
>0.849
>1.099
>0.882
>0.999
MAE: 0.930 (0.080)


## Training the model on all the data as the final model for this baseline

This latest model can be used to test additional data if needed - or could be used by a audiologist

In [7]:
n_inputs, n_outputs = X.shape[1], y.shape[1]

model = base_model(n_inputs, n_outputs)

# fit the model on all data
model.fit(X, y, verbose=0, epochs=100);

In [8]:
# checks what it looks like
i=0
while i < len(x_test):
    
    row = x_test[i]
    yhat = model.predict(asarray([row]))
    print('Predicted:', np.round_(yhat[0], decimals = 2))
    print('Real value:', np.round_(y_test[i], decimals = 2))
    print('Diff:', np.round_(np.round_(yhat[0], decimals = 2) - np.round_(y_test[i], decimals = 2), decimals=2))
    
    print('\n')
    i+=1

Predicted: [ 1.79  4.79  3.92  9.16 24.29 24.16 24.35]
Real value: [ 2.    6.13  4.88  9.82 22.13 21.7  20.95]
Diff: [-0.21 -1.34 -0.96 -0.66  2.16  2.46  3.4 ]


Predicted: [ 1.96  4.53  5.17  7.82 15.72 15.76 15.95]
Real value: [ 2.    6.28  5.03  7.47 14.95 15.52 16.08]
Diff: [-0.04 -1.75  0.14  0.35  0.77  0.24 -0.13]


Predicted: [1.89 1.93 1.41 1.91 3.7  3.78 3.85]
Real value: [2.   2.   2.   2.   3.   2.82 2.64]
Diff: [-0.11 -0.07 -0.59 -0.09  0.7   0.96  1.21]


Predicted: [1.55 1.31 3.38 3.92 5.69 5.68 5.67]
Real value: [2.   2.   3.51 4.15 5.51 5.73 6.09]
Diff: [-0.45 -0.69 -0.13 -0.23  0.18 -0.05 -0.42]


Predicted: [2.22 2.23 3.09 3.9  6.68 6.39 6.21]
Real value: [2.   2.   3.33 4.28 7.38 7.32 7.18]
Diff: [ 0.22  0.23 -0.24 -0.38 -0.7  -0.93 -0.97]


Predicted: [1.81 2.07 2.04 3.59 8.72 8.71 8.72]
Real value: [2.   2.   2.   3.77 9.37 9.52 9.65]
Diff: [-0.19  0.07  0.04 -0.18 -0.65 -0.81 -0.93]


Predicted: [2.47 3.68 2.52 3.35 6.23 6.2  6.23]
Real value: [2.59 3.87 2.   2.