In [1]:
import pyDOE
from scipy import stats
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu
import sys
from sklearn.preprocessing import MinMaxScaler
import scipy.stats.distributions as dist
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.gaussian_process.kernels import DotProduct
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from collections import namedtuple
from sklearn.linear_model import ElasticNet
import sklearn
from sklearn.preprocessing import PolynomialFeatures

## Surrogate Models

In [2]:
''' Elastic Net Regression '''
def elastic_net(train_data,test_data):
    scaler =  MinMaxScaler().fit(np.r_[train_data.iloc[:,:-1].values])
    regr = ElasticNet(alpha= 10102 ,random_state=0 , l1_ratio=1.0, fit_intercept =True, max_iter=3000,selection='random').fit(scaler.transform ( np.array(train_data.iloc[:,:-1])) ,  np.array(train_data.iloc[:,-1]))
    pred = regr.predict(scaler.transform(test_data))
    return regr,pred


''' Kriging'''
def kriging(train_data,test_data):
    kernel =  RBF()
    scaler = MinMaxScaler().fit(np.r_[train_data.iloc[:,:-1].values])
    gpr = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer= 15,random_state=0,
                                   normalize_y=True ).fit(scaler.transform(train_data.iloc[:,:-1]), train_data.iloc[:,-1])
    pred = gpr.predict(scaler.transform(test_data))
    return gpr,pred

''' KNN Regression Implementation'''
def KNN(train_data,test_data):
    scaler = MinMaxScaler().fit(np.r_[train_data.iloc[:,:-1].values])
    regr = KNeighborsRegressor(n_neighbors=12,weights='distance',algorithm='brute',p=2
                               ).fit(scaler.transform(train_data.iloc[:,:-1]), train_data.iloc[:,-1])

    pred = regr.predict(scaler.transform(test_data))
    return regr,pred


''' Support Vector Regression'''
def _SVR(train_data,test_data):
    scaler = sklearn.preprocessing.MinMaxScaler().fit(np.r_[train_data.iloc[:,:-1].values])
    gpr = sklearn.svm.SVR(kernel='rbf', gamma = 37.21 , C = 1000.000000 ,max_iter=1500).fit( scaler.transform(train_data.iloc[:,:-1]), train_data.iloc[:,-1])
    pred = gpr.predict(scaler.transform(test_data))
    return gpr,pred

""" Generating Polynomial Features i.e., Function Basis """
def quadratic_polynomial (df):
    return pd.DataFrame(PolynomialFeatures(degree=2).fit_transform(df))

""" Quadratic Regression with Elastic Net Penalty"""
def polynomial(tr, te):
    f_original = train['Y']
    true = test['Y']
    temp1 = quadratic_polynomial (tr.iloc[:,:-1])
    temp2 = quadratic_polynomial (te.iloc[:,:-1])
    temp1 ['Y'] = f_original
    model_eln , pred_eln = elastic_net(temp1,temp2)
    return model_eln , pred_eln

## Load Training and Test Data Set initially Generated

In [3]:
path = "Training_Data_Sets\\train_2_1000Samples.csv"
train = pd.read_csv(path).iloc[:,1:]
test = pd.read_csv(path[:-42]+str('Test_Data_Sets\\test_2_200Samples.csv')).iloc[:,1:]
true = np.array(test['Y'])

## Modeling Accuracy

## KNN

In [4]:
%timeit model_knn ,pred_knn = KNN(train,test.iloc[:,:-1])

8.21 ms ± 178 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
model_knn ,pred_knn = KNN(train,test.iloc[:,:-1])
np.mean((abs(true-pred_knn) / abs(true) ) * 100)

33.26969639116717

## SVR

In [6]:
%timeit model_svr , pred_svr = _SVR(train,test.iloc[:,:-1])

118 ms ± 18.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
model_svr , pred_svr = _SVR(train,test.iloc[:,:-1])
np.mean((abs(true-pred_svr) / abs(true) ) * 100)

45.660280039508386

## Kriging

In [8]:
%timeit model_kri , pred_kri = kriging(train,test.iloc[:,:-1])

10.1 s ± 1.54 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
model_kri , pred_kri = kriging(train,test.iloc[:,:-1])
np.mean((abs(true-pred_kri) / abs(true) ) * 100)

17.330186043699765

## ELN

In [10]:
%timeit polynomial (train,test)

244 ms ± 13.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
model_eln , pred_eln = polynomial (train,test)
np.mean((abs(true-pred_eln) / abs(true) ) * 100)

1.8067936058606264

## Save the Results

In [12]:
F2_accuracy = pd.DataFrame([true, pred_eln , pred_knn , pred_kri , pred_svr]).T
F2_accuracy.columns = ['True' , 'ELN' ,  'KNN' ,  'Kri',  'SVR']
F2_accuracy.to_csv('Results\\F2_Accuracy.csv')