In [19]:
#!/usr/bin/env python
# V2A3_regression_airfoilnoise.py
# Programmgeruest zu Versuch 2, Aufgabe 3
# to log outputs start with: python V2A3_regression_airfoilnoise.py >V2A3_regression_airfoilnoise.log

import numpy as np
import pandas as pd

from V2A2_Regression import *


# ***** MAIN PROGRAM ********
# (I) Hyper-Parameters
S=3;               # S-fold cross-validation
lmbda=1;           # regularization parameter (lambda>0 avoids also singularities)
K=13;               # K for K-Nearest Neighbors
flagKLinReg = 1;   # if flag==1 and K>=D then do a linear regression of the KNNs to make prediction
deg=5;             # degree of basis function polynomials 
flagSTD=1;         # if >0 then standardize data before training (i.e., scale X to mean value 0 and standard deviation 1)
N_pred=5;          # number of predictions on the training set for testing
x_test_1 = [1250,11,0.2,69.2,0.0051];   # REPLACE dummy code: define test vector 1
x_test_2 = [1305,8,0.1,57.7,0.0048];   # REPLACE dummy code: define test vector 2



In [2]:
# (II) Load data 
fname='./AirfoilSelfNoise/airfoil_self_noise.xls'
airfoil_data = pd.read_excel(fname,0); # load data as pandas data frame 
T = airfoil_data.values[:,5]           # target values = noise load (= column 5 of data table)
X = airfoil_data.values[:,:5]          # feature vectors (= column 0-4 of data table)
N,D=X.shape                            # size and dimensionality of data set
idx_perm = np.random.permutation(N)    # get random permutation for selection of test vectors 
print("Data set ",fname," has size N=", N, " and dimensionality D=",D)
print("X=",X)
print("T=",T)
print("x_test_1=",x_test_1)
print("x_test_2=",x_test_2)
print("number of basis functions M=", len(phi_polynomial(X[1],deg)))



Data set  ./AirfoilSelfNoise/airfoil_self_noise.xls  has size N= 1502  and dimensionality D= 5
X= [[1.00000e+03 0.00000e+00 3.04800e-01 7.13000e+01 2.66337e-03]
 [1.25000e+03 0.00000e+00 3.04800e-01 7.13000e+01 2.66337e-03]
 [1.60000e+03 0.00000e+00 3.04800e-01 7.13000e+01 2.66337e-03]
 ...
 [4.00000e+03 1.56000e+01 1.01600e-01 3.96000e+01 5.28487e-02]
 [5.00000e+03 1.56000e+01 1.01600e-01 3.96000e+01 5.28487e-02]
 [6.30000e+03 1.56000e+01 1.01600e-01 3.96000e+01 5.28487e-02]]
T= [125.201 125.951 127.591 ... 106.604 106.224 104.204]
x_test_1= [1250, 11, 0.2, 69.2, 0.0051]
x_test_2= [1305, 8, 0.1, 57.7, 0.0048]
number of basis functions M= 252


In [3]:
# (III) Do least-squares regression with regularization 
print("\n#### Least Squares Regression with regularization lambda=", lmbda, " ####")
lsr = LSRRegressifier(lmbda=lmbda,phi=lambda x: phi_polynomial(x,deg),flagSTD=flagSTD)  # REPLACE dummy code: Create and fit Least-Squares Regressifier using polynomial basis function of degree deg and flagSTD for standardization of data  
lsr.fit(X,T)
print("lsr.W_LSR=",lsr.W_LSR)    # REPLACE dummy code: print weight vector for least squares regression  
print("III.1) Some predictions on the training data:")
for i in range(N_pred): 
    n=idx_perm[i]
    print("Prediction for X[",n,"]=",X[n]," is y=",lsr.predict(X[n]),", whereas true value is T[",n,"]=",T[n])   # REPLACE dummy code: compute prediction for X[n]
print("III.2) Some predicitions for new test vectors:")
print("Prediction for x_test_1 is y=", lsr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
print("Prediction for x_test_2 is y=", lsr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
print("III.3) S=",S,"fold Cross Validation:")
err_abs,err_rel = lsr.crossvalidate(S,X,T)                  # REPLACE dummy code: do cross validation!! 
print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 




#### Least Squares Regression with regularization lambda= 1  ####
lsr.W_LSR= [-3.94646520e-01 -1.80155527e+00 -3.96662974e-01 -8.64281233e-01
  3.26438812e-01 -6.21770966e-01  6.48905815e-01 -7.71134930e-01
  2.95779376e-01 -8.67132337e-02  3.61560493e-01 -2.81989365e-02
 -3.64074945e-02  1.43998983e-01  4.77503076e-02 -3.63380016e-02
 -9.73944497e-02 -3.71505203e-02 -1.53013579e-01 -2.47759041e-01
  1.29242314e-01  3.13172340e-01  1.50772922e-01  8.40072758e-01
 -8.44083280e-02  1.32532606e+00  2.52907215e-01  2.08504016e-01
 -2.03801432e-01  5.74149325e-01  3.06551563e-01 -2.34531355e-01
  4.66271349e-01 -2.43145107e-01  1.06213081e-02  2.84851888e-01
 -8.48806231e-02 -2.00727798e-01 -6.93365225e-02 -5.88853578e-02
  7.29728355e-02 -1.36528243e-01 -2.26202702e-01 -1.28035094e-01
 -2.24863325e-01 -1.13338956e-01  4.19411521e-01 -1.68848150e-02
  9.78093460e-02 -9.27714500e-02 -1.60258656e-02 -3.22486764e-01
  1.01134491e-01  7.97108235e-03 -1.81313744e-01 -3.69616798e-01
 -2.47146762

In [13]:
lmbdaRange = [1,2,5,10,30,60,100]
degRange = list(range(1,8))
LSRErrors = []

for lmb in lmbdaRange:
    for d in degRange:
        # (III) Do least-squares regression with regularization 
        print("\n#### Least Squares Regression with regularization lambda=", lmb, " deg=",d,  " ####")
        lsr = LSRRegressifier(lmbda=lmb,phi=lambda x: phi_polynomial(x,d),flagSTD=flagSTD)  # REPLACE dummy code: Create and fit Least-Squares Regressifier using polynomial basis function of degree deg and flagSTD for standardization of data  
        lsr.fit(X,T)
        print("lsr.W_LSR=",lsr.W_LSR)    # REPLACE dummy code: print weight vector for least squares regression  
        #print("III.1) Some predictions on the training data:")
        #for i in range(N_pred): 
            #n=idx_perm[i]
            #print("Prediction for X[",n,"]=",X[n]," is y=",lsr.predict(X[n]),", whereas true value is T[",n,"]=",T[n])   # REPLACE dummy code: compute prediction for X[n]
        #print("III.2) Some predicitions for new test vectors:")
        #print("Prediction for x_test_1 is y=", lsr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
        #print("Prediction for x_test_2 is y=", lsr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
        print("III.3) S=",S,"fold Cross Validation:")
        err_abs,err_rel = lsr.crossvalidate(S,X,T)                  # REPLACE dummy code: do cross validation!!
        LSRErrors.append((lmb, d, err_abs, err_rel))
        print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 


#### Least Squares Regression with regularization lambda= 1  deg= 1  ####
III.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (3.7426904512740404, 3.032703726901288, 0.0029708583213334805, 17.834629109538753) 
relative errors (E,sd,min,max)= (0.030060493569663435, 0.02449174579917344, 2.3060478008317074e-05, 0.16220524696945687)

#### Least Squares Regression with regularization lambda= 1  deg= 2  ####
III.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (3.22587019951892, 2.6693604535048245, 0.0026685517388500557, 18.03687569144377) 
relative errors (E,sd,min,max)= (0.025897430202553428, 0.021811447774288523, 2.068548547238156e-05, 0.1640446716395828)

#### Least Squares Regression with regularization lambda= 1  deg= 3  ####
III.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (2.7347807102442734, 2.3893427037029857, 0.0002965685705191845, 22.711980553441123) 
relative errors (E,sd,min,max)= (0.02196847951570637, 0.01947415135850052, 2.31089

#### Least Squares Regression with regularization lambda= 10  deg= 4  ####
III.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (2.409388698101218, 2.432416949136901, 0.001532779223296643, 40.223979632442095) 
relative errors (E,sd,min,max)= (0.019325388017867682, 0.01982834252873825, 1.3982787867948468e-05, 0.3306343213497135)

#### Least Squares Regression with regularization lambda= 10  deg= 5  ####
III.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (2.234195989248932, 2.121534010953418, 0.0025382810222112084, 25.637192620876164) 
relative errors (E,sd,min,max)= (0.017956140469290147, 0.01725996586805743, 2.2440818868457327e-05, 0.2141644052266863)

#### Least Squares Regression with regularization lambda= 10  deg= 6  ####
III.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (2.5602552756167625, 9.00328476368852, 0.0026334250723465402, 297.03757586461006) 
relative errors (E,sd,min,max)= (0.020751031714465722, 0.07495572547465935, 2.138909

#### Least Squares Regression with regularization lambda= 100  deg= 7  ####
III.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (3.1931982699978474, 12.275334135532917, 0.00017202819273620662, 396.9513910107197) 
relative errors (E,sd,min,max)= (0.02614840592942353, 0.10900138362891708, 1.3364734748536074e-06, 3.6205639560254625)


In [36]:
temp = []
for i,e in enumerate(LSRErrors):
    temp.append((e[2][3],i))
temp.sort()
print(temp)

smallest_mean= LSRErrors[4]
smallest_sd= LSRErrors[25]
smallest_min= LSRErrors[4]
smallest_max= LSRErrors[37]
print(smallest_mean)
print(smallest_sd)
print(smallest_min)
print(smallest_max)

[(17.002552206577917, 22), (17.17767182006628, 7), (17.183366839571775, 29), (17.28912745095228, 21), (17.378716909446652, 8), (17.461672186384845, 28), (17.46342689671748, 42), (17.47669845317455, 35), (17.546448934412936, 43), (17.62367638773017, 9), (17.673722789635065, 15), (17.834629109538753, 0), (17.851692929064285, 14), (18.03687569144377, 1), (18.064386599763182, 37), (18.164420279446603, 36), (18.932854120549436, 31), (19.158693486266827, 38), (19.62633153191787, 45), (19.944457937184282, 30), (20.47648509155991, 44), (20.70552325092079, 16), (20.874882190958402, 23), (22.711980553441123, 2), (22.824684241509445, 17), (25.637192620876164, 25), (25.909141298430157, 3), (26.193049945149866, 39), (29.209326189427344, 46), (30.303998883023297, 47), (32.16923094251888, 32), (40.223979632442095, 24), (40.7918816345491, 10), (42.35300633573823, 40), (53.06619440818116, 4), (60.11426053780238, 11), (72.82616010620197, 18), (124.46793115337803, 5), (162.44693942245456, 33), (190.26992

In [20]:
# (IV) Do KNN regression  
print("\n#### KNN regression with flagKLinReg=", flagKLinReg, " ####")
knnr = KNNRegressifier(K,flagKLinReg)                                   # REPLACE dummy code: Create and fit KNNRegressifier
knnr.fit(X,T)
print("IV.1) Some predictions on the training data:")
for i in range(N_pred): 
    n=idx_perm[i]
    print("Prediction for X[",n,"]=",X[n]," is y=",knnr.predict(X[n]),", whereas true value is T[",n,"]=",T[n])   # REPLACE dummy code: compute prediction for X[n]
print("IV.2) Some predicitions for new test vectors:")
print("Prediction for x_test_1 is y=", knnr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
print("Prediction for x_test_2 is y=", knnr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
print("IV.3) S=",S,"fold Cross Validation:")
err_abs,err_rel = knnr.crossvalidate(S,X,T)                   # REPLACE dummy code: do cross validation!! 
print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 




#### KNN regression with flagKLinReg= 1  ####
IV.1) Some predictions on the training data:
Prediction for X[ 829 ]= [4.00000e+02 8.40000e+00 5.08000e-02 3.17000e+01 5.80776e-03]  is y= 123.20914729855322 , whereas true value is T[ 829 ]= 120.076
Prediction for X[ 416 ]= [2.50000e+03 7.30000e+00 2.28600e-01 5.55000e+01 1.11706e-02]  is y= 120.23456809380336 , whereas true value is T[ 416 ]= 118.994
Prediction for X[ 1390 ]= [5.00000e+03 8.90000e+00 1.01600e-01 7.13000e+01 1.03088e-02]  is y= 122.07328012477107 , whereas true value is T[ 1390 ]= 116.723
Prediction for X[ 483 ]= [5.00000e+02 0.00000e+00 1.52400e-01 3.96000e+01 1.93287e-03]  is y= 121.7453400865211 , whereas true value is T[ 483 ]= 119.513
Prediction for X[ 188 ]= [1.60000e+03 0.00000e+00 2.28600e-01 7.13000e+01 2.14345e-03]  is y= 128.66854440689627 , whereas true value is T[ 188 ]= 129.134
IV.2) Some predicitions for new test vectors:
Prediction for x_test_1 is y= 126.56192024990972
Prediction for x_test_2 is y= 132.350

In [5]:
flagRange = [0,1]
kRange = list(range(1,15))
KNNErrors = []

for f in flagRange:
    for k in kRange:
        # (IV) Do KNN regression  
        print("\n#### KNN regression with flagKLinReg=", f," K=",k, " ####")
        knnr = KNNRegressifier(k,f)                                   # REPLACE dummy code: Create and fit KNNRegressifier
        knnr.fit(X,T)
        #print("IV.1) Some predictions on the training data:")
        #for i in range(N_pred): 
         #   n=idx_perm[i]
         #   print("Prediction for X[",n,"]=",X[n]," is y=",knnr.predict(X[n]),", whereas true value is T[",n,"]=",T[n])   # REPLACE dummy code: compute prediction for X[n]
        #print("IV.2) Some predicitions for new test vectors:")
        #print("Prediction for x_test_1 is y=", knnr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
        #print("Prediction for x_test_2 is y=", knnr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
        print("IV.3) S=",S,"fold Cross Validation:")
        err_abs,err_rel = knnr.crossvalidate(S,X,T)                   # REPLACE dummy code: do cross validation!! 
        KNNErrors.append((f, k, err_abs, err_rel))
        print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 



#### KNN regression with flagKLinReg= 0  K= 1  ####
IV.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (6.0853868175765635, 4.99436767804508, 0.001999999999981128, 23.773999999999987) 
relative errors (E,sd,min,max)= (0.04912818981049262, 0.040765285335784385, 1.5524213892472526e-05, 0.2200603513708646)

#### KNN regression with flagKLinReg= 0  K= 2  ####
IV.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (5.401201731025304, 4.437756334591083, 0.0049999999999954525, 22.825500000000005) 
relative errors (E,sd,min,max)= (0.04362866562469845, 0.03622014995869743, 3.8867252784803315e-05, 0.1854107975800121)

#### KNN regression with flagKLinReg= 0  K= 3  ####
IV.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (5.074835996449179, 3.9655100986458507, 0.0010000000000189857, 21.921333333333322) 
relative errors (E,sd,min,max)= (0.04098097175540358, 0.032219640944628085, 8.253208434935712e-06, 0.18799721244264245)

#### KNN regression with flagKLinR

absolute errors (E,sd,min,max)= (3.055287704727373, 2.9020350466057616, 0.0012752504544266685, 22.32668544746967) 
relative errors (E,sd,min,max)= (0.02449401087971545, 0.023395966015677703, 1.0811146896128832e-05, 0.19243824726314146)

#### KNN regression with flagKLinReg= 1  K= 14  ####
IV.3) S= 3 fold Cross Validation:
absolute errors (E,sd,min,max)= (3.0374429577788185, 3.599797758879916, 0.0018030588973516615, 82.33965256066749) 
relative errors (E,sd,min,max)= (0.024406729588508428, 0.030013356281360655, 1.5246435823013323e-05, 0.7097205802655429)


In [18]:
temp = []
for i,e in enumerate(KNNErrors):
    temp.append((e[2][3],i))
temp.sort()
#print(temp)

smallest_mean= KNNErrors[25][3]
smallest_sd= KNNErrors[26][3]
smallest_min= KNNErrors[19][3]
smallest_max= KNNErrors[12][3]
print(smallest_mean)
print(smallest_sd)
print(smallest_min)
print(smallest_max)
print(KNNErrors[26])

(0.02427795940437536, 0.02571620093770124, 8.337031118327341e-06, 0.43620988401224)
(0.02449401087971545, 0.023395966015677703, 1.0811146896128832e-05, 0.19243824726314146)
(0.032236385567851994, 0.042056333578962844, 2.129412293849249e-06, 0.5470062541566458)
(0.03841421498874818, 0.028513890419747126, 0.00013804239156331124, 0.13978792564378723)
(1, 13, (3.055287704727373, 2.9020350466057616, 0.0012752504544266685, 22.32668544746967), (0.02449401087971545, 0.023395966015677703, 1.0811146896128832e-05, 0.19243824726314146))


###### a) Vervollständigen Sie das Programmgerüst V2A3_regression_airfoilnoise.py um eine Least-Squares-Regression auf den Daten zu berechnen. Optimieren Sie die HyperParameter um bei einer S = 3-fachen Kreuzvalidierung möglichst kleine Fehlerwerte zu erhalten.

Welche Bedeutung haben jeweils die Hyper-Parameter lmbda, deg, flagSTD?

- lmbda: Regularisierungs parameter
- deg: Ist der Grad er polynomiellen Basisfunktion
- flagSTD: gibt an ob die Daten standartisiert werden sollen

Was passiert ohne Skalierung der Daten (flagSTD=0) bei höheren Polynomgraden
(achten Sie auf die Werte von maxZ)?

- EXCEPTION DUE TO BAD CONDITION:flagOK= 0  maxZ= 195590361182.93994  eps= 1e-06   
- der Fehlerwert beim invertieren explodiert ohne die Skalierung

Geben Sie Ihre optimalen Hyper-Parameter sowie die resultierenden Fehler-Werte
an.

- lambda=1, deg=5, flagSTD=1
- absolute errors (E,sd,min,max)= (2.2241976966993438, 3.6504292455503355, 0.0023069096170331704, 83.63604473632992)
- relative errors (E,sd,min,max)= (0.018030635646137383, 0.03132168189811296, 1.746150761488692e-05, 0.762838110293237)

Welche Prognosen ergibt Ihr Modell für die neuen Datenvektoren
x_test_1=[1250,11,0.2,69.2,0.0051] bzw. x_test_2=[1305,8,0.1,57.7,0.0048]
?

- Prediction for x_test_1 is y= 130.45406757371268
- Prediction for x_test_2 is y= 133.1060885540002

Welchen Polynomgrad und wieviele Basisfunktionen verwendet Ihr Modell?

- Polynomgrad ist 5
- mit 6 Basisfunktionen

###### b) Vervollständigen Sie das Programmgerüst V2A3_regression_airfoilnoise.py um eine KNN-Regression auf den Daten zu berechnen. Optimieren Sie die Hyper-Parameter um bei einer S = 3-fachen Kreuzvalidierung möglichst kleine Fehlerwerte zu erhalten.

Welche Bedeutung haben jeweils die Hyper-Parameter K und flagKLinReg?

- K: anzahl der NN die zum Ergebniss beitragen
- flagKLinReg: gibt an ob die KNN noch in einen LSRegressifier gegeben werden oder ob nur der Durchschnitt der KNN berechnet wird.

Geben Sie Ihre optimalen Hyper-Parameter sowie die resultierenden Fehler-Werte
an.

- flagKLinReg=1, K=13
- absolute errors (E,sd,min,max)= (3.0858901947930324, 3.0296772825440974, 0.006016325705218151, 32.13059666400561) 
- relative errors (E,sd,min,max)= (0.02482629194750422, 0.024838560765987942, 4.864074982591945e-05, 0.2851617187841634)

Welche Prognosen ergibt Ihr Modell für die neuen Datenvektoren
x_test_1=[1250,11,0.2,69.2,0.0051] bzw. x_test_2=[1305,8,0.1,57.7,0.0048]
?

- Prediction for x_test_1 is y= 126.56192024990972
- Prediction for x_test_2 is y= 132.35085577388358

###### c) Vergleichen Sie die beiden Modelle. Welches liefert die besseren Ergebnisse?

LSR: absolute errors (E,sd,min,max)= (2.2241976966993438, 3.6504292455503355, 0.0023069096170331704, 83.63604473632992)  
KNN: absolute errors (E,sd,min,max)= (3.0858901947930324, 3.0296772825440974, 0.006016325705218151, 32.13059666400561)  
absolute differences:                (−0.861692498,       0.620751963,        −0.003709416, 51.505448072)   
LSR: relative errors (E,sd,min,max)= (0.018030635646137383, 0.03132168189811296, 1.746150761488692e-05, 0.762838110293237)  
KNN: relative errors (E,sd,min,max)= (0.02482629194750422, 0.024838560765987942, 4.864074982591945e-05, 0.2851617187841634)
relative differences:                (−0.006795656,        0.006483121,          −0.000031179, 0.477676392

- KNN scheint die bessereren Ergebnisse zu liefern, verwendet intern aber auch LSR