In [1]:
# importing libraries
from k_fold import *
from splitData import *
from algos.polynomial_regression import *

In [2]:
# data extraction
data = []
with open('temp_pre.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        data.append([int(row["temperature"]), float(row["pressure"])])

In [3]:
# data normalization
# Z-value transformer
def Zvalue(x,mean,std):
    return (x - mean)/std

def dataNormalize(data):
    X = np.asarray(data)
    mean, std = [], []
    for i in range(X.shape[1]):
        mean.append(np.mean(X[:,i]))
        std.append(np.std(X[:,i]))
    for i in range(X.shape[0]):
        for j in range(0,X.shape[1]):
            X[i][j] = Zvalue(X[i][j],mean[j],std[j])
    return X

# data normalization
data = dataNormalize(data)
print(data)

[[-1.63835604 -0.56760068]
 [-1.44560827 -0.56759411]
 [-1.2528605  -0.5675626 ]
 [-1.06011273 -0.56740506]
 [-0.86736496 -0.56701119]
 [-0.67461719 -0.5658296 ]
 [-0.48186942 -0.56267868]
 [-0.28912165 -0.55545782]
 [-0.09637388 -0.54003145]
 [ 0.09637388 -0.50983515]
 [ 0.28912165 -0.45403764]
 [ 0.48186942 -0.35688432]
 [ 0.67461719 -0.19343043]
 [ 0.86736496  0.06258169]
 [ 1.06011273  0.4630109 ]
 [ 1.2528605   1.0538081 ]
 [ 1.44560827  1.90061742]
 [ 1.63835604  3.09534064]]


In [4]:
# split data into 80% training and 20% validation sets
train, valid = split(data)
print("training set:\n",train)
print("validation set\n", valid)
print()

training set:
 [[-1.63835604 -0.56760068]
 [-1.44560827 -0.56759411]
 [-1.2528605  -0.5675626 ]
 [-1.06011273 -0.56740506]
 [-0.67461719 -0.5658296 ]
 [-0.28912165 -0.55545782]
 [-0.09637388 -0.54003145]
 [ 0.09637388 -0.50983515]
 [ 0.28912165 -0.45403764]
 [ 0.67461719 -0.19343043]
 [ 0.86736496  0.06258169]
 [ 1.06011273  0.4630109 ]
 [ 1.2528605   1.0538081 ]
 [ 1.44560827  1.90061742]
 [ 1.63835604  3.09534064]]
validation set
 [[ 0.48186942 -0.35688432]
 [-0.86736496 -0.56701119]
 [-0.48186942 -0.56267868]]



In [5]:
train, valid = np.asarray(train), np.asarray(valid)
train_x = train[:,:-1] # extract x from train
train_y = train[:,-1] # extract y from train
val_x = valid[:,:-1] # extract x from validation
val_y = valid[:,-1] # extract y from validation

print("train_x:\n", train_x)
print("train_y:\n", train_y)
print("val_x:\n", val_x)
print("val_y:\n", val_y)


train_x:
 [[-1.63835604]
 [-1.44560827]
 [-1.2528605 ]
 [-1.06011273]
 [-0.67461719]
 [-0.28912165]
 [-0.09637388]
 [ 0.09637388]
 [ 0.28912165]
 [ 0.67461719]
 [ 0.86736496]
 [ 1.06011273]
 [ 1.2528605 ]
 [ 1.44560827]
 [ 1.63835604]]
train_y:
 [-0.56760068 -0.56759411 -0.5675626  -0.56740506 -0.5658296  -0.55545782
 -0.54003145 -0.50983515 -0.45403764 -0.19343043  0.06258169  0.4630109
  1.0538081   1.90061742  3.09534064]
val_x:
 [[ 0.48186942]
 [-0.86736496]
 [-0.48186942]]
val_y:
 [-0.35688432 -0.56701119 -0.56267868]


In [6]:
w0, ls = PolynomialRegression(train_x, train_y, 1) # linear regression with polynomial mapping of degree 1
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [0.054372   0.77360067] 	| empirical loss:  0.471542


In [7]:
# using the loss function to calculate the error
val_x_1 = np.array([psy(x,1) for x in val_x])
print(val_x_1, val_y, w0)
los = loss(val_x_1,val_y,w0)
print(los)

[[ 1.          0.48186942]
 [ 1.         -0.86736496]
 [ 1.         -0.48186942]] [-0.35688432 -0.56701119 -0.56267868] [0.054372   0.77360067]
0.22561214894458867


In [8]:
w0, ls = PolynomialRegression(train_x, train_y, 2) # linear regression with polynomial mapping of degree 1
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [-0.68075363  0.78607231  0.65637146] 	| empirical loss:  0.103681


In [9]:
# using the loss function to calculate the error
val_x_2 = np.array([psy(x,2) for x in val_x])
los = loss(val_x_2, val_y, w0)
print(los)

0.08422773461461912


In [74]:
w0, ls = PolynomialRegression(train_x, train_y, 3) # linear regression with polynomial mapping of degree 1
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [-0.65111133  0.09332694  0.65131723  0.3723382 ] 	| empirical loss:  0.012099


# Cross-validation with k-fold

In [63]:
# cross-validation using k-folds
part = k_fold(data)

please choose a k value among the following:
2 3 6 9 18 


In [61]:
# part = np.asarray(part)

for i in range(len(part)): 
    validation_set = np.asarray([np.asarray(x) for x in part[i]]) # validation set
    val_x, val_y = validation_set[:,:-1], validation_set[:,-1:] # extract x and y from validation set
    training_set = [x for j, x in enumerate(part) if j != i] # training set
    
    train_x = [array[:,:-1] for array in training_set] # extract x from training set
    train_y = [array[:,-1:] for array in training_set] # extract y from training set
    train_x = np.asarray(train_x) # convert to numpy array
    train_y = np.asarray(train_y)  # convert to numpy array
    train_x = train_x.reshape(train_x.shape[0],train_x.shape[1],) # reshape to 2D array
    train_y = train_y.reshape(train_y.shape[0],train_y.shape[1],) # reshape to 2D array

    over_all_loss1, over_all_loss2, over_all_loss3, over_all_loss4 = 0, 0, 0, 0
    w0, ls = PolynomialRegression(train_x, train_y, 1) # linear regression with polynomial mapping of degree 1
    over_all_loss1 += ls
    # print("FINAL RESULTS Order 1:")
    print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))
    w0, ls = PolynomialRegression(train_x, train_y, 2) # linear regression with polynomial mapping of degree 2
    over_all_loss2 += ls
    print("FINAL RESULTS Order 2:")
    print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))
    w0, ls = PolynomialRegression(train_x, train_y, 3) # linear regression with polynomial mapping of degree 2
    over_all_loss3 += ls
    print("FINAL RESULTS Order 3:")
    print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))
    w0, ls = PolynomialRegression(train_x, train_y, 4) # linear regression with polynomial mapping of degree 2
    over_all_loss4 += ls
    print("FINAL RESULTS Order 4:")
    print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))
print("OVERALL LOSS degree 1: ", over_all_loss1/len(part))
print("OVERALL LOSS degree 2: ", over_all_loss2/len(part))
print("OVERALL LOSS degree 3: ", over_all_loss3/len(part))
print("OVERALL LOSS degree 4: ", over_all_loss4/len(part))

optimal weight vector:  [[-0.24752609  0.38628457  0.41477359]
 [ 0.27208134 -0.14213015  1.14621639]
 [ 0.15766226  0.65663483  1.07545803]
 [-0.06709908 -0.29445107  1.47050378]] 	| empirical loss:  0.119973
FINAL RESULTS Order 2:
optimal weight vector:  [[-0.18414426 -0.14416919  0.1570062 ]
 [ 0.10728286 -0.04892337  0.05262928]
 [ 0.12677107  0.20074848  0.20217708]
 [-0.01538107  0.01261174  0.36630266]
 [ 0.04710642  0.09785964  0.07212742]
 [-0.1297786  -0.08783707 -0.03785284]
 [ 0.03888846 -0.09983442  0.23944285]
 [-0.1297786  -0.08783707 -0.03785284]
 [-0.11869969 -0.12377096 -0.29360815]
 [-0.09144085  0.02036214  0.14110097]
 [ 0.03888846 -0.09983442  0.23944285]
 [-0.09144085  0.02036214  0.14110097]
 [-0.14439166 -0.06104342  0.85270317]] 	| empirical loss:  0.000000
FINAL RESULTS Order 3:
optimal weight vector:  [[-0.10753647 -0.08776611  0.00826652]
 [ 0.05597916  0.00665781  0.05706418]
 [ 0.10528505  0.11076031  0.07942587]
 [ 0.00599365  0.00344845  0.13014322]
 [ 