In [1]:
import sys
import pyDOE
import pandas as pd
import numpy as np
import scipy.stats.distributions as dist
from sklearn.svm import SVR
from collections import namedtuple
from sklearn.preprocessing import MinMaxScaler
import sklearn
from collections import namedtuple

## Helper Functions

In [2]:
ValueRange = namedtuple('ValueRange', ['min', 'max'])

def determinerange(values):
    """Determine the range of values in each dimension"""
    return ValueRange(np.min(values, axis=0), np.max(values, axis=0))


def linearscaletransform(values, *, range_in=None, range_out=ValueRange(0, 1), scale_only=False):
    """Perform a scale transformation of `values`: [range_in] --> [range_out]"""

    if range_in is None:
        range_in = determinerange(values)
    elif not isinstance(range_in, ValueRange):
        range_in = ValueRange(*range_in)

    if not isinstance(range_out, ValueRange):
        range_out = ValueRange(*range_out)

    scale_out = range_out.max - range_out.min
    scale_in = range_in.max - range_in.min

    if scale_only:
        scaled_values = (values / scale_in) * scale_out
    else:
        scaled_values = (values - range_in.min) / scale_in
        scaled_values = (scaled_values * scale_out) + range_out.min

    return scaled_values

''' Support Vector Regression'''
def _SVR(train_data,test_data , Hyper):
    gam, reg= Hyper
    scaler = sklearn.preprocessing.MinMaxScaler().fit(np.r_[train_data.iloc[:,:-1].values, test_data.values])
    gpr = sklearn.svm.SVR(kernel='rbf', C=reg ,max_iter=1500).fit( scaler.transform(train_data.iloc[:,:-1]), train_data.iloc[:,-1])
    pred = gpr.predict(scaler.transform(test_data))
    return gpr,pred

''' Latin HyperCube Sampling Design of Experiment for Hyper_Parameters Optimization'''
def DOE_Hyper():
    np.random.seed(0)
    lhd = pyDOE.lhs(n=2, samples=100, criterion='m')
    X1, X2 = lhd[:,0], lhd[:,1] 
    X1 = linearscaletransform(X1,range_out=(0.00001,1e2))
    X2 = linearscaletransform(X2,range_out=(0.0001 , 1000))
    Hyper_Parameters = pd.DataFrame()
    Hyper_Parameters['Gamma'] = pd.Series(X1)
    Hyper_Parameters['Regularization'] = pd.Series(X2)
    return Hyper_Parameters

## Load Training and Test Data Set initially Generated

In [3]:
path = "Training_Data_Sets\\train_2_1000Samples.csv"
train = pd.read_csv(path).iloc[:,1:]
test = pd.read_csv(path[:-42]+str('Test_Data_Sets\\test_2_200Samples.csv')).iloc[:,1:]
true = np.array(test['Y'])

## SVR

In [4]:
Hyper_Parameters = DOE_Hyper()
mean_abs_error = np.zeros(Hyper_Parameters.shape[0])
for i in range(Hyper_Parameters.shape[0]):
    temp = np.array(Hyper_Parameters.iloc[i,:])
    model,pred_k_m = _SVR(train,test.iloc[:,:-1],temp)
    mean_abs_error[i] = np.mean((abs(true-pred_k_m) / abs(true) ) * 100)
Hyper_Parameters ['Mean_Error'] = pd.Series(mean_abs_error)

In [5]:
Hyper_Parameters.iloc[Hyper_Parameters.Mean_Error.idxmin(),:-1]

Gamma               37.213462
Regularization    1000.000000
Name: 92, dtype: float64