In [65]:
# importing libraries
from k_fold import *
from splitData import *
from algos.polynomial_regression import *

In [66]:
# data extraction
data = []
with open('temp_pre.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        data.append([int(row["temperature"]), float(row["pressure"])])

In [67]:
# data normalization
# Z-value transformer
def Zvalue(x,mean,std):
    return (x - mean)/std

def dataNormalize(data):
    X = np.asarray(data)
    mean, std = [], []
    for i in range(X.shape[1]):
        mean.append(np.mean(X[:,i]))
        std.append(np.std(X[:,i]))
    for i in range(X.shape[0]):
        for j in range(0,X.shape[1]):
            X[i][j] = Zvalue(X[i][j],mean[j],std[j])
    return X

# data normalization
data = dataNormalize(data)
print(data)

[[-1.63835604 -0.56760068]
 [-1.44560827 -0.56759411]
 [-1.2528605  -0.5675626 ]
 [-1.06011273 -0.56740506]
 [-0.86736496 -0.56701119]
 [-0.67461719 -0.5658296 ]
 [-0.48186942 -0.56267868]
 [-0.28912165 -0.55545782]
 [-0.09637388 -0.54003145]
 [ 0.09637388 -0.50983515]
 [ 0.28912165 -0.45403764]
 [ 0.48186942 -0.35688432]
 [ 0.67461719 -0.19343043]
 [ 0.86736496  0.06258169]
 [ 1.06011273  0.4630109 ]
 [ 1.2528605   1.0538081 ]
 [ 1.44560827  1.90061742]
 [ 1.63835604  3.09534064]]


In [68]:
# split data into 80% training and 20% validation sets
train, valid = split(data)
print("training set:\n",train)
print("validation set\n", valid)
print()

training set:
 [[-1.63835604 -0.56760068]
 [-1.2528605  -0.5675626 ]
 [-1.06011273 -0.56740506]
 [-0.86736496 -0.56701119]
 [-0.67461719 -0.5658296 ]
 [-0.48186942 -0.56267868]
 [-0.28912165 -0.55545782]
 [ 0.09637388 -0.50983515]
 [ 0.28912165 -0.45403764]
 [ 0.48186942 -0.35688432]
 [ 0.67461719 -0.19343043]
 [ 1.06011273  0.4630109 ]
 [ 1.2528605   1.0538081 ]
 [ 1.44560827  1.90061742]
 [ 1.63835604  3.09534064]]
validation set
 [[ 0.86736496  0.06258169]
 [-1.44560827 -0.56759411]
 [-0.09637388 -0.54003145]]



In [69]:
train, valid = np.asarray(train), np.asarray(valid)
train_x = train[:,:-1] # extract x from train
train_y = train[:,-1] # extract y from train
val_x = valid[:,:-1] # extract x from validation
val_y = valid[:,-1] # extract y from validation

print("train_x:\n", train_x)
print("train_y:\n", train_y)
print("val_x:\n", val_x)
print("val_y:\n", val_y)


train_x:
 [[-1.63835604]
 [-1.2528605 ]
 [-1.06011273]
 [-0.86736496]
 [-0.67461719]
 [-0.48186942]
 [-0.28912165]
 [ 0.09637388]
 [ 0.28912165]
 [ 0.48186942]
 [ 0.67461719]
 [ 1.06011273]
 [ 1.2528605 ]
 [ 1.44560827]
 [ 1.63835604]]
train_y:
 [-0.56760068 -0.5675626  -0.56740506 -0.56701119 -0.5658296  -0.56267868
 -0.55545782 -0.50983515 -0.45403764 -0.35688432 -0.19343043  0.4630109
  1.0538081   1.90061742  3.09534064]
val_x:
 [[ 0.86736496]
 [-1.44560827]
 [-0.09637388]]
val_y:
 [ 0.06258169 -0.56759411 -0.54003145]


In [70]:
w0, ls = PolynomialRegression(train_x, train_y, 1) # linear regression with polynomial mapping of degree 1
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [0.03204423 0.83659361] 	| empirical loss:  0.448558


In [71]:
# using the loss function to calculate the error
val_x_1 = np.array([psy(x,1) for x in val_x])
print(val_x_1, val_y, w0)
los = loss(val_x_1,val_y,w0)
print(los)

[[ 1.          0.86736496]
 [ 1.         -1.44560827]
 [ 1.         -0.09637388]] [ 0.06258169 -0.56759411 -0.54003145] [0.03204423 0.83659361]
0.36549079937757073


In [72]:
w0, ls = PolynomialRegression(train_x, train_y, 2) # linear regression with polynomial mapping of degree 1
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [-0.63475672  0.76250238  0.6635592 ] 	| empirical loss:  0.100835


In [73]:
# using the loss function to calculate the error
val_x_2 = np.array([psy(x,2) for x in val_x])
los = loss(val_x_2, val_y, w0)
print(los)

0.0960163370382746


In [74]:
w0, ls = PolynomialRegression(train_x, train_y, 3) # linear regression with polynomial mapping of degree 1
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [-0.65111133  0.09332694  0.65131723  0.3723382 ] 	| empirical loss:  0.012099


In [75]:
# using the loss function to calculate the error
val_x_3 = np.array([psy(x,3) for x in val_x])
print(val_x_3)
los = loss(val_x_3, val_y, w0)
print(los)

[[ 1.00000000e+00  8.67364964e-01  7.52321981e-01  6.52537729e-01]
 [ 1.00000000e+00 -1.44560827e+00  2.08978328e+00 -3.02100800e+00]
 [ 1.00000000e+00 -9.63738849e-02  9.28792570e-03 -8.95113482e-04]]
0.007813437586428135


# Cross-validation with k-fold

In [76]:
# cross-validation using k-folds
part = k_fold(data)
for e in part:
    print(e)

please choose a k value among the following:
2 3 6 9 18 
[[ 0.09637388 -0.50983515]
 [ 0.28912165 -0.45403764]]
[[-1.44560827 -0.56759411]
 [-1.2528605  -0.5675626 ]]
[[-0.67461719 -0.5658296 ]
 [ 0.67461719 -0.19343043]]
[[1.44560827 1.90061742]
 [1.63835604 3.09534064]]
[[ 1.06011273  0.4630109 ]
 [-1.06011273 -0.56740506]]
[[ 0.48186942 -0.35688432]
 [-0.86736496 -0.56701119]]
[[ 0.86736496  0.06258169]
 [-0.09637388 -0.54003145]]
[[-0.48186942 -0.56267868]
 [-0.28912165 -0.55545782]]
[[-1.63835604 -0.56760068]
 [ 1.2528605   1.0538081 ]]


In [77]:
part = np.asarray(part)

train_x = []
for i in range(part.shape[0] - 1):
    for j in range (part.shape[1]):
        train_x.append(part[i][j][:-1])
train_y = []
for i in range(part.shape[0] - 1):
    for j in range (part.shape[1]):
        train_y.append(part[i][j][:-1])

train_x = np.asarray(train_x)
train_y = np.asarray(train_y)

val_x = []
val_y = []
for i in range(part.shape[1]):
    val_x.append(part[-1][i][:-1])
    val_y.append(part[-1][i][-1])

val_x = np.asarray(val_x)
val_y = np.asarray(val_y)

print("train_x:\n", train_x)
print("train_y:\n", train_y)
print("val_x:\n", val_x)
print("val_y:\n", val_y)

train_x:
 [[ 0.09637388]
 [ 0.28912165]
 [-1.44560827]
 [-1.2528605 ]
 [-0.67461719]
 [ 0.67461719]
 [ 1.44560827]
 [ 1.63835604]
 [ 1.06011273]
 [-1.06011273]
 [ 0.48186942]
 [-0.86736496]
 [ 0.86736496]
 [-0.09637388]
 [-0.48186942]
 [-0.28912165]]
train_y:
 [[ 0.09637388]
 [ 0.28912165]
 [-1.44560827]
 [-1.2528605 ]
 [-0.67461719]
 [ 0.67461719]
 [ 1.44560827]
 [ 1.63835604]
 [ 1.06011273]
 [-1.06011273]
 [ 0.48186942]
 [-0.86736496]
 [ 0.86736496]
 [-0.09637388]
 [-0.48186942]
 [-0.28912165]]
val_x:
 [[-1.63835604]
 [ 1.2528605 ]]
val_y:
 [-0.56760068  1.0538081 ]


In [78]:
w0, ls = PolynomialRegression(train_x, train_y, 1) # linear regression with polynomial mapping of degree 1
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [[-3.81639165e-17]
 [ 1.00000000e+00]] 	| empirical loss:  0.000000


In [79]:
# using the loss function to calculate the error
val_x_1 = np.asarray([psy(x,1) for x in val_x])
print(val_x_1)
l = loss(val_x_1, val_y, w0)
print(l)

[[ 1.         -1.63835604]
 [ 1.          1.2528605 ]]
0.5930694585226541


In [80]:
w0, ls = PolynomialRegression(train_x, train_y, 2) # linear regression with polynomial mapping of degree 2
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [[-4.16333634e-16]
 [ 1.00000000e+00]
 [ 2.49800181e-16]] 	| empirical loss:  0.000000


In [81]:
# using the loss function to calculate the error
val_x_2 = np.asarray([psy(x,2) for x in val_x])
los = loss(val_x_2, val_y, w0)
print(los)

0.5930694585226534


In [82]:
w0, ls = PolynomialRegression(train_x, train_y, 3) # linear regression with polynomial mapping of degree 2
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [[ 2.24820162e-15]
 [ 1.00000000e+00]
 [-9.99200722e-16]
 [-1.80411242e-15]] 	| empirical loss:  0.000000


In [83]:
# using the loss function to calculate the error
val_x_3 = np.asarray([psy(x,3) for x in val_x])
los = loss(val_x_3, val_y, w0)
print(los)

0.5930694585226535


In [84]:
w0, ls = PolynomialRegression(train_x, train_y, 4) # linear regression with polynomial mapping of degree 2
print("FINAL RESULTS:")
print("optimal weight vector: ", w0, "\t| empirical loss: ", "{0:.6f}".format(ls))

FINAL RESULTS:
optimal weight vector:  [[ 6.10622664e-16]
 [ 1.00000000e+00]
 [-4.21884749e-15]
 [ 2.27595720e-15]
 [ 1.11022302e-15]] 	| empirical loss:  0.000000


In [85]:
# using the loss function to calculate the error
val_x_4 = np.asarray([psy(x,4) for x in val_x])
los = loss(val_x_4, val_y, w0)
print(los)

0.593069458522665
