In [3]:
#!/usr/bin/env python
# V2A3_regression_airfoilnoise.py
# Programmgeruest zu Versuch 2, Aufgabe 3
# to log outputs start with: python V2A3_regression_airfoilnoise.py >V2A3_regression_airfoilnoise.log

import numpy as np
import pandas as pd

from V2A2_Regression import *


# ***** MAIN PROGRAM ********
# (I) Hyper-Parameters
S=3;               # S-fold cross-validation
lmbda=1;           # regularization parameter (lambda>0 avoids also singularities)
K=13;               # K for K-Nearest Neighbors
flagKLinReg = 1;   # if flag==1 and K>=D then do a linear regression of the KNNs to make prediction
deg=5;             # degree of basis function polynomials 
flagSTD=1;         # if >0 then standardize data before training (i.e., scale X to mean value 0 and standard deviation 1)
N_pred=5;          # number of predictions on the training set for testing
x_test_1 = [1250,11,0.2,69.2,0.0051];   # REPLACE dummy code: define test vector 1
x_test_2 = [1305,8,0.1,57.7,0.0048];   # REPLACE dummy code: define test vector 2



In [4]:
# (II) Load data 
fname='./AirfoilSelfNoise/airfoil_self_noise.xls'
airfoil_data = pd.read_excel(fname,0); # load data as pandas data frame 
T = airfoil_data.values[:,5]           # target values = noise load (= column 5 of data table)
X = airfoil_data.values[:,:5]          # feature vectors (= column 0-4 of data table)
N,D=X.shape                            # size and dimensionality of data set
idx_perm = np.random.permutation(N)    # get random permutation for selection of test vectors 
print("Data set ",fname," has size N=", N, " and dimensionality D=",D)
print("X=",X)
print("T=",T)
print("x_test_1=",x_test_1)
print("x_test_2=",x_test_2)
print("number of basis functions M=", len(phi_polynomial(X[1],deg)))



Data set  ./AirfoilSelfNoise/airfoil_self_noise.xls  has size N= 1502  and dimensionality D= 5
X= [[1.00000e+03 0.00000e+00 3.04800e-01 7.13000e+01 2.66337e-03]
 [1.25000e+03 0.00000e+00 3.04800e-01 7.13000e+01 2.66337e-03]
 [1.60000e+03 0.00000e+00 3.04800e-01 7.13000e+01 2.66337e-03]
 ...
 [4.00000e+03 1.56000e+01 1.01600e-01 3.96000e+01 5.28487e-02]
 [5.00000e+03 1.56000e+01 1.01600e-01 3.96000e+01 5.28487e-02]
 [6.30000e+03 1.56000e+01 1.01600e-01 3.96000e+01 5.28487e-02]]
T= [125.201 125.951 127.591 ... 106.604 106.224 104.204]
x_test_1= [1250, 11, 0.2, 69.2, 0.0051]
x_test_2= [1305, 8, 0.1, 57.7, 0.0048]
number of basis functions M= 252


In [5]:
# (III) Do least-squares regression with regularization 
print("\n#### Least Squares Regression with regularization lambda=", lmbda, " ####")
lsr = LSRRegressifier(lmbda=lmbda,phi=lambda x: phi_polynomial(x,deg),flagSTD=flagSTD)   
lsr.fit(X,T)
print("lsr.W_LSR=",lsr.W_LSR)      
print("III.1) Some predictions on the training data:")
for i in range(N_pred): 
    n=idx_perm[i]
    print("Prediction for X[",n,"]=",X[n]," is y=",lsr.predict(X[n]),", whereas true value is T[",n,"]=",T[n]) 
print("III.2) Some predicitions for new test vectors:")
print("Prediction for x_test_1 is y=", lsr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
print("Prediction for x_test_2 is y=", lsr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
print("III.3) S=",S,"fold Cross Validation:")
err_abs,err_rel = lsr.crossvalidate(S,X,T)                  # REPLACE dummy code: do cross validation!! 
print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 




#### Least Squares Regression with regularization lambda= 1  ####
lsr.W_LSR= [-3.94646520e-01 -1.80155527e+00 -3.96662974e-01 -8.64281233e-01
  3.26438812e-01 -6.21770966e-01  6.48905815e-01 -7.71134930e-01
  2.95779376e-01 -8.67132337e-02  3.61560493e-01 -2.81989365e-02
 -3.64074945e-02  1.43998983e-01  4.77503076e-02 -3.63380016e-02
 -9.73944497e-02 -3.71505203e-02 -1.53013579e-01 -2.47759041e-01
  1.29242314e-01  3.13172340e-01  1.50772922e-01  8.40072758e-01
 -8.44083280e-02  1.32532606e+00  2.52907215e-01  2.08504016e-01
 -2.03801432e-01  5.74149325e-01  3.06551563e-01 -2.34531355e-01
  4.66271349e-01 -2.43145107e-01  1.06213081e-02  2.84851888e-01
 -8.48806231e-02 -2.00727798e-01 -6.93365225e-02 -5.88853578e-02
  7.29728355e-02 -1.36528243e-01 -2.26202702e-01 -1.28035094e-01
 -2.24863325e-01 -1.13338956e-01  4.19411521e-01 -1.68848150e-02
  9.78093460e-02 -9.27714500e-02 -1.60258656e-02 -3.22486764e-01
  1.01134491e-01  7.97108235e-03 -1.81313744e-01 -3.69616798e-01
 -2.47146762

In [None]:
lmbdaRange = [1,2,5,10,30,60,100]
degRange = list(range(1,8))
LSRErrors = []

for lmb in lmbdaRange:
    for d in degRange:
        # (III) Do least-squares regression with regularization 
        print("\n#### Least Squares Regression with regularization lambda=", lmb, " deg=",d,  " ####")
        lsr = LSRRegressifier(lmbda=lmb,phi=lambda x: phi_polynomial(x,d),flagSTD=flagSTD)  # REPLACE dummy code: Create and fit Least-Squares Regressifier using polynomial basis function of degree deg and flagSTD for standardization of data  
        lsr.fit(X,T)
        print("lsr.W_LSR=",lsr.W_LSR)    # REPLACE dummy code: print weight vector for least squares regression  
        #print("III.1) Some predictions on the training data:")
        #for i in range(N_pred): 
            #n=idx_perm[i]
            #print("Prediction for X[",n,"]=",X[n]," is y=",lsr.predict(X[n]),", whereas true value is T[",n,"]=",T[n])   # REPLACE dummy code: compute prediction for X[n]
        #print("III.2) Some predicitions for new test vectors:")
        #print("Prediction for x_test_1 is y=", lsr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
        #print("Prediction for x_test_2 is y=", lsr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
        print("III.3) S=",S,"fold Cross Validation:")
        err_abs,err_rel = lsr.crossvalidate(S,X,T)                  # REPLACE dummy code: do cross validation!!
        LSRErrors.append((lmb, d, err_abs, err_rel))
        print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 

In [7]:
# (IV) Do KNN regression  
print("\n#### KNN regression with flagKLinReg=", flagKLinReg, " ####")
knnr = KNNRegressifier(K,flagKLinReg)                                   # REPLACE dummy code: Create and fit KNNRegressifier
knnr.fit(X,T)
print("IV.1) Some predictions on the training data:")
for i in range(N_pred): 
    n=idx_perm[i]
    print("Prediction for X[",n,"]=",X[n]," is y=",knnr.predict(X[n]),", whereas true value is T[",n,"]=",T[n])   # REPLACE dummy code: compute prediction for X[n]
print("IV.2) Some predicitions for new test vectors:")
print("Prediction for x_test_1 is y=", knnr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
print("Prediction for x_test_2 is y=", knnr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
print("IV.3) S=",S,"fold Cross Validation:")
err_abs,err_rel = knnr.crossvalidate(S,X,T)                   # REPLACE dummy code: do cross validation!! 
print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 




#### KNN regression with flagKLinReg= 1  ####
IV.1) Some predictions on the training data:
Prediction for X[ 642 ]= [5.00000e+03 9.90000e+00 1.52400e-01 7.13000e+01 1.93001e-02]  is y= 115.72024509094034 , whereas true value is T[ 642 ]= 114.569
Prediction for X[ 6 ]= [4.00000e+03 0.00000e+00 3.04800e-01 7.13000e+01 2.66337e-03]  is y= 122.63775641751042 , whereas true value is T[ 6 ]= 123.061
Prediction for X[ 1359 ]= [4.00000e+02 6.70000e+00 1.01600e-01 3.96000e+01 5.78076e-03]  is y= 125.48188306776453 , whereas true value is T[ 1359 ]= 128.295
Prediction for X[ 990 ]= [1.00000e+04 0.00000e+00 2.54000e-02 3.96000e+01 4.28464e-04]  is y= 127.62332699282784 , whereas true value is T[ 990 ]= 133.376
Prediction for X[ 953 ]= [8.0000e+02 1.9700e+01 5.0800e-02 3.9600e+01 3.6484e-02]  is y= 127.19837760085018 , whereas true value is T[ 953 ]= 119.174
IV.2) Some predicitions for new test vectors:
Prediction for x_test_1 is y= 126.56192024990972
Prediction for x_test_2 is y= 132.35085577388

In [None]:
flagRange = [0,1]
kRange = list(range(1,15))
KNNErrors = []

for f in flagRange:
    for k in kRange:
        # (IV) Do KNN regression  
        print("\n#### KNN regression with flagKLinReg=", f," K=",k, " ####")
        knnr = KNNRegressifier(k,f)                                   # REPLACE dummy code: Create and fit KNNRegressifier
        knnr.fit(X,T)
        #print("IV.1) Some predictions on the training data:")
        #for i in range(N_pred): 
         #   n=idx_perm[i]
         #   print("Prediction for X[",n,"]=",X[n]," is y=",knnr.predict(X[n]),", whereas true value is T[",n,"]=",T[n])   # REPLACE dummy code: compute prediction for X[n]
        #print("IV.2) Some predicitions for new test vectors:")
        #print("Prediction for x_test_1 is y=", knnr.predict(x_test_1))    # REPLACE dummy code: compute prediction for x_test_1
        #print("Prediction for x_test_2 is y=", knnr.predict(x_test_2))    # REPLACE dummy code: compute prediction for x_test_2
        print("IV.3) S=",S,"fold Cross Validation:")
        err_abs,err_rel = knnr.crossvalidate(S,X,T)                   # REPLACE dummy code: do cross validation!! 
        KNNErrors.append((f, k, err_abs, err_rel))
        print("absolute errors (E,sd,min,max)=", err_abs, "\nrelative errors (E,sd,min,max)=", err_rel) 


###### a) Vervollständigen Sie das Programmgerüst V2A3_regression_airfoilnoise.py um eine Least-Squares-Regression auf den Daten zu berechnen. Optimieren Sie die HyperParameter um bei einer S = 3-fachen Kreuzvalidierung möglichst kleine Fehlerwerte zu erhalten.

Welche Bedeutung haben jeweils die Hyper-Parameter lmbda, deg, flagSTD?

- lmbda: Regularisierungs parameter
- deg: Ist der Grad er polynomiellen Basisfunktion
- flagSTD: gibt an ob die Daten standartisiert werden sollen

Was passiert ohne Skalierung der Daten (flagSTD=0) bei höheren Polynomgraden
(achten Sie auf die Werte von maxZ)?

- EXCEPTION DUE TO BAD CONDITION:flagOK= 0  maxZ= 195590361182.93994  eps= 1e-06   
- der Fehlerwert beim invertieren explodiert ohne die Skalierung

Geben Sie Ihre optimalen Hyper-Parameter sowie die resultierenden Fehler-Werte
an.

- lambda=1, deg=5, flagSTD=1
- absolute errors (E,sd,min,max)= (2.2241976966993438, 3.6504292455503355, 0.0023069096170331704, 83.63604473632992)
- relative errors (E,sd,min,max)= (0.018030635646137383, 0.03132168189811296, 1.746150761488692e-05, 0.762838110293237)

Welche Prognosen ergibt Ihr Modell für die neuen Datenvektoren
x_test_1=[1250,11,0.2,69.2,0.0051] bzw. x_test_2=[1305,8,0.1,57.7,0.0048]
?

- Prediction for x_test_1 is y= 130.45406757371268
- Prediction for x_test_2 is y= 133.1060885540002

Welchen Polynomgrad und wieviele Basisfunktionen verwendet Ihr Modell?

- Polynomgrad ist 5
- mit 6 Basisfunktionen

###### b) Vervollständigen Sie das Programmgerüst V2A3_regression_airfoilnoise.py um eine KNN-Regression auf den Daten zu berechnen. Optimieren Sie die Hyper-Parameter um bei einer S = 3-fachen Kreuzvalidierung möglichst kleine Fehlerwerte zu erhalten.

Welche Bedeutung haben jeweils die Hyper-Parameter K und flagKLinReg?

- K: anzahl der NN die zum Ergebniss beitragen
- flagKLinReg: gibt an ob die KNN noch in einen LSRegressifier gegeben werden oder ob nur der Durchschnitt der KNN berechnet wird.

Geben Sie Ihre optimalen Hyper-Parameter sowie die resultierenden Fehler-Werte
an.

- flagKLinReg=1, K=13
- absolute errors (E,sd,min,max)= (3.0858901947930324, 3.0296772825440974, 0.006016325705218151, 32.13059666400561) 
- relative errors (E,sd,min,max)= (0.02482629194750422, 0.024838560765987942, 4.864074982591945e-05, 0.2851617187841634)

Welche Prognosen ergibt Ihr Modell für die neuen Datenvektoren
x_test_1=[1250,11,0.2,69.2,0.0051] bzw. x_test_2=[1305,8,0.1,57.7,0.0048]
?

- Prediction for x_test_1 is y= 126.56192024990972
- Prediction for x_test_2 is y= 132.35085577388358

###### c) Vergleichen Sie die beiden Modelle. Welches liefert die besseren Ergebnisse?

LSR: absolute errors (E,sd,min,max)= (2.2241976966993438, 3.6504292455503355, 0.0023069096170331704, 83.63604473632992)  
KNN: absolute errors (E,sd,min,max)= (3.0858901947930324, 3.0296772825440974, 0.006016325705218151, 32.13059666400561)  
absolute differences:                (−0.861692498,       0.620751963,        −0.003709416, 51.505448072)   
LSR: relative errors (E,sd,min,max)= (0.018030635646137383, 0.03132168189811296, 1.746150761488692e-05, 0.762838110293237)  
KNN: relative errors (E,sd,min,max)= (0.02482629194750422, 0.024838560765987942, 4.864074982591945e-05, 0.2851617187841634)
relative differences:                (−0.006795656,        0.006483121,          −0.000031179, 0.477676392

- KNN scheint die bessereren Ergebnisse zu liefern, verwendet intern aber auch LSR