In [None]:
import numpy
import pandas
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import train_test_split
from IPython.core.debugger import Tracer
import matplotlib.pyplot as plt

In [None]:
# load dataset
dataframe = pandas.read_csv("featuresToUseAll.csv", delimiter=',')
#dataframe = pandas.read_csv("housing.csv", delim_whitespace=True, header=None)
n_features=24
n_target=26 # 26:cadence, 27:step length, 28:speed
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:n_features]
Y = dataset[:,n_target]

In [None]:
# Data normalization
mean = numpy.mean(X)
std = numpy.std(X)
X = (X - mean) / std

In [None]:
# Split data between train and test
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y,random_state=42)

In [None]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
kfolds= KFold(n_splits=4,shuffle=True, random_state=seed)

In [None]:
# custom R^2 error
def coeff_determination(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square( y_true-y_pred ))
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [None]:
# NN creation
nNeuronList=[50] 
epochs=[10,100] # we used [100, 1000, 1500, 3000,5000,10000]
r2testCV = numpy.zeros([len(epochs),2])
mseTestCV = numpy.zeros([len(epochs),2])

noCVscore = numpy.zeros([len(epochs),2])
# scoresTrain = numpy.zeros([len(epochs),2])
r2CV=[]
mseCV=[]
noCVscores=[]
# for ind,i in enumerate(nNeuronList):

for ind,i in enumerate(epochs):
 r2CV=[] 
 mseCV=[]
 for train, test in kfolds.split(X,Y):
    model1 = Sequential()
    model1.add(Dense(50, input_dim=n_features, kernel_initializer='normal', activation='tanh')) # https://keras.io/activations/
    model1.add(Dense(40, input_dim=n_features, kernel_initializer='normal', activation='tanh'))
    model1.add(Dense(30, input_dim=n_features, kernel_initializer='normal', activation='tanh'))
    model1.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model1.compile(loss='mean_squared_error', optimizer='adam',metrics=['mae','mse',coeff_determination])
    model1.fit(X[train], Y[train],batch_size=10, epochs=i,verbose=0)
    score = model1.evaluate(X[test], Y[test], verbose=0)
    r2CV.append(score[3])
    mseCV.append(score[2])
 # get the mean over buckets   
 r2testCV[ind,:]=i,numpy.mean(r2CV)
 mseTestCV[ind,:]=i,numpy.mean(mseCV)


In [None]:
# Training and evaluating the same model on the entire data to get the training scores
r2train = numpy.zeros([len(epochs),2])# store R2 for training
mseTrain = numpy.zeros([len(epochs),2]) # store mse for training data

# for ind,i in enumerate(nNeuronList):
for ind,i in enumerate(epochs):
 
    model = Sequential()
    model.add(Dense(50, input_dim=n_features, kernel_initializer='normal', activation='tanh')) # https://keras.io/activations/
    model.add(Dense(40, input_dim=n_features, kernel_initializer='normal', activation='tanh'))
    model.add(Dense(30, input_dim=n_features, kernel_initializer='normal', activation='tanh'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam',metrics=['mae','mse',coeff_determination])
    model.fit(Xtrain, Ytrain,batch_size=10, epochs=i,verbose=0)
    score = model.evaluate(Xtest, Ytest, verbose=0)
    scoreTrain = model.evaluate(Xtrain,Ytrain,verbose=0)
    # storage of data
    r2train[ind,:]=i,scoreTrain[3]
    mseTrain[ind,:]=i,scoreTrain[2]


In [None]:
# Train metrics
print("\n Train %s: %.2f" % (model1.metrics_names[1], scoreTrain[1]))
print("\n Train %s: %.2f" % (model1.metrics_names[2], scoreTrain[2]))
print("\n Train %s: %.2f" % (model1.metrics_names[3], scoreTrain[3]))
# test metrics
print("\n Test %s: %.2f" % (model1.metrics_names[1], score[1]))
print("\n Test %s: %.2f" % (model1.metrics_names[2], score[2]))
print("\n Test %s: %.2f" % (model1.metrics_names[3], score[3]))

In [None]:
# # Plot Ytest Ypred
plt.figure()
plt.scatter(Ytest, model1.predict(Xtest), alpha=0.4)
plt.ylabel('CV mean Ytest prediction')
plt.xlabel('Ytest')
plt.title('Prperty Prediction')
plt.axis([0,1.5,0,1.5])
plt.gca().set_aspect('equal', adjustable='box')
plt.plot(numpy.linspace(0,1.5,100),numpy.linspace(0,1.5,100),'r')


plt.show()

In [None]:
# Plot Ytrain Ytrain_pred
import matplotlib.pyplot as plt
plt.figure()
plt.scatter(Ytrain, model1.predict(Xtrain),alpha=0.4)
plt.ylabel('Ytrain prediction')
plt.xlabel('Ytrain')
plt.title('Property Prediction')
plt.xlim([0,1.5])
plt.ylim([0,1.5])
plt.gca().set_aspect('equal', adjustable='box')
plt.plot(numpy.linspace(0,1.5,100),numpy.linspace(0,1.5,100),'r')

plt.show()

In [None]:
# plot R2
import matplotlib
fig1=plt.figure(num=None, figsize=(8, 6), dpi=100, facecolor='w', edgecolor='k')
matplotlib.rcParams.update({'font.size': 16})
plt.scatter(r2testCV[:,0], r2testCV[:,1],alpha=0.4, label='CV Testing')
plt.scatter(r2train[:,0], r2train[:,1],alpha=0.4, label='Training')

plt.ylabel(r'$R^2$')
plt.xlabel('epochs number')
plt.title(r'$R^2$ - Property Prediction')
plt.legend()
# fig1.savefig('NN_cadence_r2.png')
plt.show()

In [None]:
# plot MSE
fig2=plt.figure(num=None, figsize=(8, 6), dpi=100, facecolor='w', edgecolor='k')
matplotlib.rcParams.update({'font.size': 16})
plt.scatter(mseTrain[:,0], mseTrain[:,1],alpha=0.4, label='Training')
plt.scatter(mseTestCV[:,0], mseTestCV[:,1],alpha=0.4, label='CV Testing')
plt.ylabel(r'MSE')
plt.xlabel('Epochs number')
plt.title(r'MSE - Property Prediction')
plt.legend()
plt.show()
# fig2.savefig('NN_speed_mse.png')


In [None]:
# save data
data=np.vstack((r2train[:,0],r2train[:,1],r2testCV[:,1],mseTrain[:,1], mseTestCV[:,1]))
np.save('NN_property.npy',data)