# Creating a baseline model

In [2]:
import pandas as pd
import tensorflow as tf
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from numpy import mean, std, asarray
from sklearn.model_selection import RepeatedKFold

print('Tensorflow Version:',tf.__version__)
print('Numpy Version:',np.__version__)
print('Pandas Version', pd.__version__)

Tensorflow Version: 1.14.0
Numpy Version: 1.20.3
Pandas Version 1.3.2


# Setting the data

For this baseline, I used only the results from the audiogram test to define the settings of the devices. 

In [3]:
inputs = pd.read_csv('input.csv')
X = inputs[['audr_500', 'audr_1k', 'audr_2k', 'audr_3k', 'audr_4k', 'audr_6k', 'audr_8k']].values[:-2]
x_test =  inputs[['audr_500', 'audr_1k', 'audr_2k', 'audr_3k', 'audr_4k', 'audr_6k', 'audr_8k']].values[-2:]

outputs = pd.read_csv('output.csv').iloc[:2999]
y = outputs.values[:-2]
y_test = outputs.values[-2:]

# Checking that shapes make sense
print(X.shape, y.shape)

(2997, 7) (2997, 7)


In [4]:
def evaluate_model(X, y):
    results = list()
    
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        
        # prepare data
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        
        # define model
        model = baseline_model(n_inputs, n_outputs)
        
        # fit model
        model.fit(X_train, y_train, verbose=0, epochs=100)
        
        # evaluate model on test set
        mae = model.evaluate(X_test, y_test, verbose=0)
        
        # store result
        print('>%.3f' % mae)
        results.append(mae)
        
    return results

In [8]:
""" 
Baseline model with inputs layer, 1 hidden layer with 10 nodes and 1 output layer
"""
def baseline_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(10, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs, activation='linear'))
    model.compile(loss='mae', optimizer='adam')
    return model

In [9]:
# evaluate model
results = evaluate_model(X, y)
# summarize performance
print('MAE: %.3f (%.3f)' % (mean(results), std(results)))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
>0.983
>1.014
>0.986
>0.971
>0.916
>1.099
>1.007
>1.018
>1.004
>0.984
>0.835
>0.904
>0.950
>0.990
>1.018
>0.967
>1.019
>0.919
>1.042
>1.070
>1.018
>1.018
>1.015
>1.019
>0.931
>0.880
>0.900
>0.963
>0.967
>1.041
MAE: 0.982 (0.057)


This baseline model obtained: an average MAE of 0.975 (0.072)

## Training the model on all the data as the final model for this baseline

This latest model can be used to test additional data if needed - or could be used by a audiologist

In [11]:
# Training the model on all the data 
n_inputs, n_outputs = X.shape[1], y.shape[1]

model = baseline_model(n_inputs, n_outputs)

# fit the model on all data
model.fit(X, y, verbose=0, epochs=100);

# checks what it looks like
i=0
while i < len(x_test):
    
    row = x_test[i]
    yhat = model.predict(asarray([row]))
    print('Predicted:', np.round_(yhat[0], decimals = 2))
    print('Real value:', np.round_(y_test[i], decimals = 2))
    print('Diff:', np.round_(np.round_(yhat[0], decimals = 2) - np.round_(y_test[i], decimals = 2), decimals=2))
    
    print('\n')
    i+=1

Predicted: [2.23 4.04 2.36 4.09 9.09 9.54 9.75]
Real value: [ 2.    4.36  2.87  4.3   8.76  9.67 10.72]
Diff: [ 0.23 -0.32 -0.51 -0.21  0.33 -0.13 -0.97]


Predicted: [ 3.45  8.16  5.9   7.31 12.69 13.62 14.14]
Real value: [ 2.93  8.28  4.98  6.51 12.09 12.51 13.02]
Diff: [ 0.52 -0.12  0.92  0.8   0.6   1.11  1.12]


