In [1]:
#import Libraries
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

## Data preperation

In [2]:
df=np.genfromtxt('doe_data_wise2021.csv',names=True,delimiter=',');
df = np.unique(df, axis=0)
namesX = ['FA','PE','BP','RA','CE']
Xinit=np.array([df[namesX[0]],df[namesX[1]],df[namesX[2]],df[namesX[3]],df[namesX[4]]]).transpose()
yinit=df['distance']
# selected coeficients
df=df[df['RA']==185]
df=df[df['CE']==300]
y=df['distance']
X=np.array([df[namesX[0]],df[namesX[1]],df[namesX[2]],df[namesX[0]]*df[namesX[0]],df[namesX[0]]*df[namesX[1]],df[namesX[0]]*df[namesX[0]]*df[namesX[1]],df[namesX[0]]*df[namesX[2]],df[namesX[0]]*df[namesX[0]]*df[namesX[2]],df[namesX[1]]*df[namesX[2]],df[namesX[0]]*df[namesX[1]]*df[namesX[2]],df[namesX[2]]*df[namesX[2]]]).transpose()
feature_names = ['FA','PE','BP','FA^2','FA * PE','FA^2 * PE','BP * FA','BP * FA^2','BP * PE','BP * FA * PE','BP^2']
# ---------
print("Xinit\n%s\nSize: %s" % (namesX,Xinit.shape))
print("---")
print("X\n%s\nSize: %s" % (feature_names,X.shape))

Xinit
['FA', 'PE', 'BP', 'RA', 'CE']
Size: (332, 5)
---
X
['FA', 'PE', 'BP', 'FA^2', 'FA * PE', 'FA^2 * PE', 'BP * FA', 'BP * FA^2', 'BP * PE', 'BP * FA * PE', 'BP^2']
Size: (147, 11)


**Data Preperation finished**

## Fit regression model with selected polynomial coefficients

In [3]:
import statsmodels.api as sm
Xsm = sm.add_constant(X)
olsmodel = sm.OLS(y, Xsm)
olsresult = olsmodel.fit()
print(olsresult.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.994
Model:                            OLS   Adj. R-squared:                  0.994
Method:                 Least Squares   F-statistic:                     3466.
Date:                Thu, 10 Dec 2020   Prob (F-statistic):          1.32e-152
Time:                        11:55:58   Log-Likelihood:                -514.90
No. Observations:                 147   AIC:                             1046.
Df Residuals:                     139   BIC:                             1070.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0078      0.004     -2.051      0.0

## Random Forest 

In [4]:
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(n_estimators=500,max_depth=50)
regr.fit(Xinit, yinit)
print(regr.score(Xinit, yinit))

0.9757545983962976


In [5]:
importances = regr.feature_importances_
indices = np.argsort(importances)[::-1]
number_most_important = len(importances)
# Print the feature ranking
print("The %s most important features: " % str(number_most_important))
for f in range(0,number_most_important):
    print("%s. feature %s \t(%f)" % (f + 1, namesX[indices[f]], importances[indices[f]]))

The 5 most important features: 
1. feature FA 	(0.285131)
2. feature RA 	(0.229344)
3. feature CE 	(0.219563)
4. feature BP 	(0.155375)
5. feature PE 	(0.110587)


## MLP Regression

In [6]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
scale.fit(Xinit)
Xp = scale.transform(Xinit)
mlp = MLPRegressor(max_iter=5000, hidden_layer_sizes=(100,100),activation='relu')
mlp.fit(Xp, yinit)
#print(mlp.out_activation_)
mlp.score(Xp, yinit)

0.9728050886408335

# Comparing results 

In [7]:
Xt = [[100,200,200,185,300]]; # ['FA','PE','BP','RA','CE']
# choosen coeficients
Xt=np.array(Xt)
Xtest =[[Xt[0,0],Xt[0,1],Xt[0,2],Xt[0,0]*Xt[0,0],Xt[0,0]*Xt[0,1],Xt[0,0]*Xt[0,0]*Xt[0,1],Xt[0,0]*Xt[0,2],Xt[0,0]*Xt[0,0]*Xt[0,2],Xt[0,1]*Xt[0,2],Xt[0,0]*Xt[0,1]*Xt[0,2],Xt[0,2]*Xt[0,2]]]
# -------------------
print("Distance predicted by Linear Regression OLS: \t\t%.0f" % np.round(olsresult.predict(np.append(1,Xtest).reshape(1,-1)),1))
print("Distance predicted by Random Forest: \t\t\t%.0f" % np.round(regr.predict(Xt),1))
print("Distance predicted by MLP: \t\t\t\t%.0f" % np.round(mlp.predict(scale.transform(Xt)),1))

Distance predicted by Linear Regression OLS: 		474
Distance predicted by Random Forest: 			457
Distance predicted by MLP: 				428
