In [None]:
from sklearn import linear_model
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.core.debugger import Tracer
from sklearn.metrics import r2_score
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.cross_validation import train_test_split

In [None]:
# load dataset
dataframe = pd.read_csv("featuresToUseAll.csv", delimiter=',')
n_features=24
n_target=26 # 26:cadence, 27:step length, 28:speed
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:n_features]
Y = dataset[:,n_target]

In [None]:
# Data normalization
mean = np.mean(X)
std = np.std(X)
X = (X - mean) / std

In [None]:
# PCA 
pca=PCA(n_components=6)
pca.fit(X)
X_pca=pca.transform(X)

In [None]:
# Split data into train and test
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y,random_state=42)

In [None]:
#Train the model
alphas=[100,10,1,0.1,0.01]
testR2=np.zeros([len(alphas),2])
trainR2=np.zeros([len(alphas),2])

testMSE=np.zeros([len(alphas),2])
trainMSE=np.zeros([len(alphas),2])
for ind, alph in enumerate(alphas):
    model = linear_model.Ridge(alpha = .5)
    model.fit(Xtrain,Ytrain)
    Ypred=model.predict(Xtest)
    Ypred_train=model.predict(Xtrain)
    # get R2 for the train and test data
    testR2[ind,:]=alph,r2_score(Ytest,Ypred)
    trainR2[ind,:]=alph,r2_score(Ytrain,Ypred_train)
    
    testMSE[ind,:]=alph,mean_squared_error(Ytest,Ypred)
    trainMSE[ind,:]=alph,mean_squared_error(Ytrain,Ypred_train)

In [None]:
# # Plot Ytest Ypred
plt.figure()
plt.scatter(Ytest, model.predict(Xtest), alpha=0.4)
plt.ylabel('Ytest prediction')
plt.xlabel('Ytest')
plt.title('Property Prediction')
plt.axis([0,1.5,0,1.5])
plt.gca().set_aspect('equal', adjustable='box')
plt.plot(np.linspace(0,1.5,100),np.linspace(0,1.5,100),'r')
plt.show()

In [None]:
# Plot Ytrain Ytrain_pred
plt.figure()
plt.scatter(Ytrain, model.predict(Xtrain),alpha=0.4)
plt.ylabel('Ytrain prediction')
plt.xlabel('Ytrain')
plt.title('Property Prediction')
plt.xlim([0,1.5])
plt.ylim([0,1.5])
plt.gca().set_aspect('equal', adjustable='box')
plt.plot(np.linspace(0,1.5,100),np.linspace(0,1.5,100),'r')
plt.show()

In [None]:
import matplotlib
fig1=plt.figure()
matplotlib.rcParams.update({'font.size': 16})
fig1=plt.figure(num=None, figsize=(8, 6), dpi=100, facecolor='w', edgecolor='k')

plt.plot(trainR2[:,0], trainR2[:,1],alpha=0.4, label='Training')
plt.plot(testR2[:,0], testR2[:,1],alpha=0.4, label='Testing')

plt.ylabel(r'$R^2$')
plt.xlabel(r'$\alpha$ Regularization Parameter')
plt.title(r'$R^2$ - Property Prediction')
plt.legend()
fig1.savefig('Property_r2.png')
plt.show()

In [None]:
import matplotlib
fig2=plt.figure()
matplotlib.rcParams.update({'font.size': 16})
fig2=plt.figure(num=None, figsize=(8, 6), dpi=100, facecolor='w', edgecolor='k')

plt.plot(trainMSE[:,0], trainMSE[:,1],alpha=0.4, label='Training')
plt.plot(testMSE[:,0], testMSE[:,1],alpha=0.4, label='Testing')

plt.ylabel(r'$R^2$')
plt.xlabel(r'$\alpha$ Regularization Parameter')
plt.title(r'MSE - Property Prediction')
plt.legend()

plt.show()
fig2.savefig('Property_mse.png')

In [None]:
# data storage
data=np.vstack((trainR2[:,0],trainR2[:,1],testR2[:,1],trainMSE[:,1], testMSE[:,1]))
np.save('Reg_Property.npy',data)