# Model Iteration - Adding demographics

In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from numpy import mean, std, asarray

from sklearn.model_selection import RepeatedKFold
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

print(tf.__version__)

# Setting the data

For this first iteration, I used the results from the audiogram test and added the remaining features (gender, age, comp_speed, experience) without any transformation. 

In [2]:
inputs = pd.read_csv('input.csv')
X = inputs[['gender','age','comp_speed', 'experience','audr_500', 'audr_1k', 'audr_2k', 'audr_3k', 'audr_4k', 'audr_6k', 'audr_8k']].values[:-10]
x_test =  inputs[['gender','age','comp_speed', 'experience', 'audr_500', 'audr_1k', 'audr_2k', 'audr_3k', 'audr_4k', 'audr_6k', 'audr_8k']].values[-10:]

outputs = pd.read_csv('output.csv').iloc[:2999]
y = outputs.values[:-10]
y_test = outputs.values[-10:]

print(X.shape, y.shape)

(2989, 11) (2989, 7)


In [3]:
def evaluate_model(X, y):
    results = list()
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        # prepare data
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        
        # define model
        model = base_model(n_inputs, n_outputs)
        
        # fit model
        model.fit(X_train, y_train, verbose=0, epochs=100)
        
        # evaluate model on test set
        mae = model.evaluate(X_test, y_test, verbose=0)
       
        # store result
        print('>%.3f' % mae)
        results.append(mae)
        
    return results

In [6]:
def base_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(10, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs, activation='linear' ))
    model.compile(loss='mae', optimizer='adam')
    return model

In [7]:
# evaluate model
results = evaluate_model(X, y)
# summarize performance
print('MAE: %.3f (%.3f)' % (mean(results), std(results)))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
>0.865
>0.965
>0.815
>0.941
>0.881
>0.929
>0.919
>0.965
>0.973
>0.917
>0.994
>0.945
>1.022
>0.969
>0.968
>0.860
>0.931
>1.055
>0.940
>1.032
>0.906
>1.004
>0.959
>1.143
>0.953
>0.895
>0.875
>0.889
>0.964
>0.932
MAE: 0.947 (0.064)


## Training the model on all the data as the final model for this baseline

This latest model can be used to test additional data if needed - or could be used by a audiologist

In [8]:
n_inputs, n_outputs = X.shape[1], y.shape[1]

model = base_model(n_inputs, n_outputs)

# fit the model on all data
model.fit(X, y, verbose=0, epochs=100);

In [9]:
# checks what it looks like
i=0
while i < len(x_test):
    
    row = x_test[i]
    yhat = model.predict(asarray([row]))
    print('Predicted:', np.round_(yhat[0], decimals = 2))
    print('Real value:', np.round_(y_test[i], decimals = 2))
    print('Diff:', np.round_(np.round_(yhat[0], decimals = 2) - np.round_(y_test[i], decimals = 2), decimals=2))
    
    print('\n')
    i+=1

Predicted: [ 1.36  5.47  5.    9.91 24.21 24.78 25.24]
Real value: [ 2.    6.13  4.88  9.82 22.13 21.7  20.95]
Diff: [-0.64 -0.66  0.12  0.09  2.08  3.08  4.29]


Predicted: [ 2.08  4.74  5.38  7.87 15.31 15.42 15.65]
Real value: [ 2.    6.28  5.03  7.47 14.95 15.52 16.08]
Diff: [ 0.08 -1.54  0.35  0.4   0.36 -0.1  -0.43]


Predicted: [1.79 1.78 0.86 1.35 3.58 3.62 3.64]
Real value: [2.   2.   2.   2.   3.   2.82 2.64]
Diff: [-0.21 -0.22 -1.14 -0.65  0.58  0.8   1.  ]


Predicted: [1.44 1.32 3.39 3.83 5.69 5.8  5.84]
Real value: [2.   2.   3.51 4.15 5.51 5.73 6.09]
Diff: [-0.56 -0.68 -0.12 -0.32  0.18  0.07 -0.25]


Predicted: [1.92 2.45 3.05 3.8  6.35 6.3  6.22]
Real value: [2.   2.   3.33 4.28 7.38 7.32 7.18]
Diff: [-0.08  0.45 -0.28 -0.48 -1.03 -1.02 -0.96]


Predicted: [1.22 1.04 2.23 3.68 8.54 8.69 8.71]
Real value: [2.   2.   2.   3.77 9.37 9.52 9.65]
Diff: [-0.78 -0.96  0.23 -0.09 -0.83 -0.83 -0.94]


Predicted: [2.59 3.67 2.34 3.46 6.57 6.26 6.01]
Real value: [2.59 3.87 2.   2.