## 5.2.2
### Author: Piyushi
##### data loading from Suvodip's Set2 2.1.3

In [1]:
#Import required packages
import numpy as np
import scipy.io
import pandas as pd
import random
from sklearn.linear_model import Ridge
from sklearn.metrics import explained_variance_score
from sklearn.preprocessing import normalize
from sklearn.model_selection import KFold
from sklearn.svm import SVR
random.seed(0)

### Load DrivFace image data

In [2]:
#Upload DrivFace.mat file in session storage before running the following code
img_data = scipy.io.loadmat('DrivFace.mat')
drivFaceData = img_data['drivFaceD'][0]
X_raw = img_data['drivFaceD'][0][0][0]
X_raw = normalize(X_raw)
print(X_raw.shape)

(606, 6400)


### Load DrivFace label data

In [3]:
#Upload drivPoints.txt file in session storage before running the following code
label_data = pd.read_csv("drivPoints.txt")
print(label_data.head(5))

                  fileName  subject  imgNum  label  ang   xF   yF   wF   hF  \
0  20130529_01_Driv_001_f         1       1      2    0  292  209  100  112   
1  20130529_01_Driv_002_f         1       2      2    0  286  200  109  128   
2  20130529_01_Driv_003_f         1       3      2    0  290  204  105  121   
3  20130529_01_Driv_004_f         1       4      2    0  287  202  112  118   
4  20130529_01_Driv_005_f         1       5      2    0  290  193  104  119   

   xRE  yRE  xLE  yLE   xN   yN  xRM  yRM  xLM  yLM  
0  323  232  367  231  353  254  332  278  361  278  
1  324  235  366  235  353  258  333  281  361  281  
2  325  240  367  239  351  260  334  282  362  282  
3  325  230  369  230  353  253  335  274  362  275  
4  325  224  366  225  353  244  333  268  363  268  


#### 3-fold CV with identity feature map

In [None]:
y = np.vstack((label_data['xF'], label_data['yF'], label_data['wF'], label_data['hF'])).T 

In [4]:
for reg in [0.01, 0.1, 1, 10, 100]:
    print("For Regularization ", reg)
    clf_ridge = Ridge(alpha=reg)
    clf_svr = SVR(C=1.0/reg)
    net_ridge = []
    net_svr = []
    for i in range(5):
        print("  Trial:{}\n".format(i))
        kf = KFold(n_splits=3, random_state=i, shuffle=True)
        
        for train_index, val_index in kf.split(X_raw):
            X_train, X_val = X_raw[train_index], X_raw[val_index]
            y_train, y_val = y[train_index], y[val_index]
            
            clf_ridge.fit(X_train, y_train[:, 1])
            y_pred_ridge = clf_ridge.predict(X_val)
            
            net_ridge.append(explained_variance_score(y_val[:,1], y_pred_ridge))
            print(f"   Ridge: Explained variance for yF: {net_ridge[-1]}")
            
            clf_svr.fit(X_train, y_train[:, 1])
            y_pred_svr = clf_svr.predict(X_val)
            
            net_svr.append(explained_variance_score(y_val[:,1], y_pred_svr))
            print(f"   SVR: Explained variance for yF: {net_svr[-1]}")
    print("   Avg EV for Ridge", np.mean(net_ridge))
    print("   Avg EV for SVR", np.mean(net_svr))

For Regularization  0.01
  Trial:0

   Ridge: Explained variance for yF: 0.7473730862370739
   SVR: Explained variance for yF: 0.7847159058922946
   Ridge: Explained variance for yF: 0.803833670167283
   SVR: Explained variance for yF: 0.7948223153233511
   Ridge: Explained variance for yF: 0.7938810162150396
   SVR: Explained variance for yF: 0.8671176658448388
  Trial:1

   Ridge: Explained variance for yF: 0.75571028978622
   SVR: Explained variance for yF: 0.7646167828065491
   Ridge: Explained variance for yF: 0.8426296241128325
   SVR: Explained variance for yF: 0.9063733919560811
   Ridge: Explained variance for yF: 0.7536551680561072
   SVR: Explained variance for yF: 0.7936925125627966
  Trial:2

   Ridge: Explained variance for yF: 0.7600202709629906
   SVR: Explained variance for yF: 0.8191947913636124
   Ridge: Explained variance for yF: 0.7709990416088889
   SVR: Explained variance for yF: 0.7665423599360321
   Ridge: Explained variance for yF: 0.8088214194458265
   SVR: E

### comment: 
Explained variance scores are better compared to previous results in Set2. Explained variance is getting worse on increasing regularization.

In [12]:
X1 = X_raw
X2 = np.square(X1)
X = np.concatenate((X1, X2), axis=1)
X = np.c_[X, np.ones(X.shape[0])]  

In [15]:
for reg in [0.01, 0.1, 1, 10, 100]:
    print("For Regularization ", reg)
    clf_ridge = Ridge(alpha=reg)
    clf_svr = SVR(C=1.0/reg)
    net_ridge = []
    net_svr = []
    for i in range(5):
        print("  Trial:{}\n".format(i))
        kf = KFold(n_splits=3, random_state=i, shuffle=True)
        
        for train_index, val_index in kf.split(X):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]
            
            clf_ridge.fit(X_train, y_train[:, 1])
            y_pred_ridge = clf_ridge.predict(X_val)
            
            net_ridge.append(explained_variance_score(y_val[:,1], y_pred_ridge))
            print(f"   Ridge: Explained variance for yF: {net_ridge[-1]}")
            
            clf_svr.fit(X_train, y_train[:, 1])
            y_pred_svr = clf_svr.predict(X_val)
            
            net_svr.append(explained_variance_score(y_val[:,1], y_pred_svr))
            print(f"   SVR: Explained variance for yF: {net_svr[-1]}")
    print("   Avg EV for Ridge", np.mean(net_ridge))
    print("   Avg EV for SVR", np.mean(net_svr))

For Regularization  0.01
  Trial:0

   Ridge: Explained variance for yF: 0.7473899514402607
   SVR: Explained variance for yF: 0.6159617453758472
   Ridge: Explained variance for yF: 0.8038566212146037
   SVR: Explained variance for yF: 0.5824495338023841
   Ridge: Explained variance for yF: 0.7938995818715897
   SVR: Explained variance for yF: 0.6960399957224899
  Trial:1

   Ridge: Explained variance for yF: 0.7557281922149675
   SVR: Explained variance for yF: 0.5936063535755015
   Ridge: Explained variance for yF: 0.8426430292183845
   SVR: Explained variance for yF: 0.7651659002889529
   Ridge: Explained variance for yF: 0.7536613860168013
   SVR: Explained variance for yF: 0.5613651896112379
  Trial:2

   Ridge: Explained variance for yF: 0.7600313328375268
   SVR: Explained variance for yF: 0.6587373287173282
   Ridge: Explained variance for yF: 0.771020365021235
   SVR: Explained variance for yF: 0.5668105493022046
   Ridge: Explained variance for yF: 0.808841835585866
   SVR: 

### comment: 
Ridge regression's explained variance scores are better thatn the previous set2 results. SVR's explained variance scores are almost same as previous results in Set2. Explained variance is getting worse on increasing regularization.