In [1]:
"""
Do not change the input and output format.
If our script cannot run your code or the format is improper, your code will not be graded.

The only functions you need to implement in this template is linear_regression_noreg, regularized_linear_regression,
tune_lambda, and test_error.
"""

import numpy as np
import pandas as pd

###### Q4.1 ######
def linear_regression_noreg(X, y):
  """
  Compute the weight parameter given X and y.
  Inputs:
  - X: A numpy array of shape (num_samples, D) containing feature.
  - y: A numpy array of shape (num_samples, ) containing label
  Returns:
  - w: a numpy array of shape (D, )
  """
  #####################################################
  #				 YOUR CODE HERE					                    #
    Xmtx=np.asmatrix(X)
    ymtx=np.asmatrix(y)
    XmtxT=Xmtx.T
    ymtxT=ymtx.T
    w=numpy.asmatrix(np.linalg.pinv(np.asarray((XmtxT*Xmtx))))*XmtxT*ymtxT
  #####################################################		 
  return w

###### Q4.2 ######
def regularized_linear_regression(X, y, lambd):
  """
    Compute the weight parameter given X, y and lambda.
    Inputs:
    - X: A numpy array of shape (num_samples, D) containing feature.
    - y: A numpy array of shape (num_samples, ) containing label
    - lambd: a float number containing regularization strength
    Returns:
    - w: a numpy array of shape (D, )
    """
  #####################################################
  #				 YOUR CODE HERE					    #
    Xmtx=np.asmatrix(X)
    ymtx=np.asmatrix(y)
    XmtxT=Xmtx.T
    ymtxT=ymtx.T
    I=np.identity(len(y))
    w=numpy.asmatrix(np.linalg.pinv(numpy.asarray((XmtxT*Xmtx)))+lambd*I)*XmtxT*ymtxT
  #####################################################		 
  return w

###### Q4.3 ###### 
def tune_lambda(Xtrain, ytrain, Xval, yval, lambds):
  """
    Find the best lambda value.
    Inputs:
    - Xtrain: A numpy array of shape (num_training_samples, D) containing training feature.
    - ytrain: A numpy array of shape (num_training_samples, ) containing training label
    - Xval: A numpy array of shape (num_val_samples, D) containing validation feature.
    - yval: A numpy array of shape (num_val_samples, ) containing validation label
    - lambds: a list of lambdas
    Returns:
    - bestlambda: the best lambda you find in lambds
    """
  #####################################################
  #				 YOUR CODE HERE					                    #
    valerr=[]
    Xmtx=np.asmatrix(Xtrain)
    ymtx=np.asmatrix(ytrain)
    XmtxT=Xmtx.T
    ymtxT=ymtx.T
    I=np.identity(len(y))
    for lambd in lambds:
        w_lambd=numpy.asmatrix(np.linalg.pinv(numpy.asarray((XmtxT*Xmtx)))+lambd*I)*XmtxT*ymtxT
        y_pred=Xval*w_lambd.T
        valerr.append(sum(y_pred-yval))
        
  #####################################################		 
  return bestlambda

###### Q4.4 ######
def test_error(w, X, y):
  """
    Compute the mean squre error on test set given X, y, and model parameter w.
    Inputs:
    - X: A numpy array of shape (num_samples, D) containing test feature.
    - y: A numpy array of shape (num_samples, ) containing test label
    - w: a numpy array of shape (D, )
    Returns:
    - err: the mean square error
    """
  return err


"""
NO MODIFICATIONS below this line.
You should only write your code in the above functions.
"""

def data_processing():
  white = pd.read_csv('winequality-white.csv', low_memory=False, sep=';').values

  [N, d] = white.shape

  np.random.seed(3)
  # prepare data
  ridx = np.random.permutation(N)
  ntr = int(np.round(N * 0.8))
  nval = int(np.round(N * 0.1))
  ntest = N - ntr - nval

  # spliting training, validation, and test

  Xtrain = np.hstack([np.ones([ntr, 1]), white[ridx[0:ntr], 0:-1]])

  ytrain = white[ridx[0:ntr], -1]

  Xval = np.hstack([np.ones([nval, 1]), white[ridx[ntr:ntr + nval], 0:-1]])
  yval = white[ridx[ntr:ntr + nval], -1]

  Xtest = np.hstack([np.ones([ntest, 1]), white[ridx[ntr + nval:], 0:-1]])
  ytest = white[ridx[ntr + nval:], -1]
  return Xtrain, ytrain, Xval, yval, Xtest, ytest


def main():
  np.set_printoptions(precision=3)
  Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing()
  # =========================Q3.1 linear_regression=================================
  w = linear_regression_noreg(Xtrain, ytrain)
  print("======== Question 3.1 Linear Regression ========")
  print("dimensionality of the model parameter is ", len(w), ".", sep="")
  print("model parameter is ", np.array_str(w))
  
  # =========================Q3.2 regularized linear_regression=====================
  lambd = 5.0
  wl = regularized_linear_regression(Xtrain, ytrain, lambd)
  print("\n")
  print("======== Question 3.2 Regularized Linear Regression ========")
  print("dimensionality of the model parameter is ", len(wl), sep="")
  print("lambda = ", lambd, ", model parameter is ", np.array_str(wl), sep="")

  # =========================Q3.3 tuning lambda======================
  lambds = [0, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1, 1, 10, 10 ** 2]
  bestlambd = tune_lambda(Xtrain, ytrain, Xval, yval, lambds)
  print("\n")
  print("======== Question 3.3 tuning lambdas ========")
  print("tuning lambda, the best lambda =  ", bestlambd, sep="")

  # =========================Q3.4 report mse on test ======================
  wbest = regularized_linear_regression(Xtrain, ytrain, bestlambd)
  mse = test_error(wbest, Xtest, ytest)
  print("\n")
  print("======== Question 3.4 report MSE ========")
  print("MSE on test is %.3f" % mse)
  
if __name__ == "__main__":
    main()
    

IndentationError: unexpected indent (<ipython-input-1-445fd80c0c3b>, line 24)

In [14]:
import numpy as np
import pandas as pd

In [15]:
def data_processing():
  white = pd.read_csv('winequality-white.csv', low_memory=False, sep=';').values

  [N, d] = white.shape

  np.random.seed(3)
  # prepare data
  ridx = np.random.permutation(N)
  ntr = int(np.round(N * 0.8))
  nval = int(np.round(N * 0.1))
  ntest = N - ntr - nval

  # spliting training, validation, and test

  Xtrain = np.hstack([np.ones([ntr, 1]), white[ridx[0:ntr], 0:-1]])

  ytrain = white[ridx[0:ntr], -1]

  Xval = np.hstack([np.ones([nval, 1]), white[ridx[ntr:ntr + nval], 0:-1]])
  yval = white[ridx[ntr:ntr + nval], -1]

  Xtest = np.hstack([np.ones([ntest, 1]), white[ridx[ntr + nval:], 0:-1]])
  ytest = white[ridx[ntr + nval:], -1]
  return Xtrain, ytrain, Xval, yval, Xtest, ytest

In [21]:
np.set_printoptions(precision=3)
Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing()

In [23]:
Xtrain

(3918, 12)

In [32]:
###### Q4.1 ######
def linear_regression_noreg(X, y):
    """
  Compute the weight parameter given X and y.
  Inputs:
  - X: A numpy array of shape (num_samples, D) containing feature.
  - y: A numpy array of shape (num_samples, ) containing label
  Returns:
  - w: a numpy array of shape (D, )
  """
  #####################################################
  #				 YOUR CODE HERE					                    #
    Xmtx=np.asmatrix(X)
    ymtx=np.asmatrix(y)
    XmtxT=Xmtx.T
    ymtxT=ymtx.T
    w=np.asarray(np.linalg.pinv(XmtxT*Xmtx)*XmtxT*ymtxT)
    w=w.ravel()
  #####################################################		 
    return w

In [33]:
  # =========================Q3.1 linear_regression=================================
  w = linear_regression_noreg(Xtrain, ytrain)
  print("======== Question 3.1 Linear Regression ========")
  print("dimensionality of the model parameter is ", len(w), ".", sep="")
  print("model parameter is ", np.array_str(w))

dimensionality of the model parameter is 12.
model parameter is  [  2.166e+02   1.145e-01  -1.824e+00  -1.065e-02   1.037e-01   1.546e-01
   3.416e-03   2.347e-04  -2.173e+02   8.348e-01   7.366e-01   1.153e-01]


In [45]:
###### Q4.2 ######
def regularized_linear_regression(X, y, lambd):
    """
    Compute the weight parameter given X, y and lambda.
    Inputs:
    - X: A numpy array of shape (num_samples, D) containing feature.
    - y: A numpy array of shape (num_samples, ) containing label
    - lambd: a float number containing regularization strength
    Returns:
    - w: a numpy array of shape (D, )
    """
  #####################################################
  #				 YOUR CODE HERE					    #
    Xmtx=np.asmatrix(X)
    ymtx=np.asmatrix(y)
    XmtxT=Xmtx.T
    ymtxT=ymtx.T
    I=np.identity(X.shape[1])
    w=np.asarray(np.linalg.pinv(XmtxT*Xmtx+lambd*I)*XmtxT*ymtxT)
    w=w.ravel()
  #####################################################		 
    return w

In [46]:
  # =========================Q3.2 regularized linear_regression=====================
lambd = 5.0
wl = regularized_linear_regression(Xtrain, ytrain, lambd)
print("\n")
print("======== Question 3.2 Regularized Linear Regression ========")
print("dimensionality of the model parameter is ", len(wl), sep="")
print("lambda = ", lambd, ", model parameter is ", np.array_str(wl), sep="")



dimensionality of the model parameter is 12
lambda = 5.0, model parameter is [  6.324e-01  -2.854e-02  -1.641e+00  -3.577e-02   2.803e-02  -1.362e-01
   5.259e-03  -8.421e-04   5.888e-01   2.887e-01   4.162e-01   3.768e-01]


In [94]:
###### Q4.3 ###### 
def tune_lambda(Xtrain, ytrain, Xval, yval, lambds):
    """
    Find the best lambda value.
    Inputs:
    - Xtrain: A numpy array of shape (num_training_samples, D) containing training feature.
    - ytrain: A numpy array of shape (num_training_samples, ) containing training label
    - Xval: A numpy array of shape (num_val_samples, D) containing validation feature.
    - yval: A numpy array of shape (num_val_samples, ) containing validation label
    - lambds: a list of lambdas
    Returns:
    - bestlambda: the best lambda you find in lambds
    """
  #####################################################
  #				 YOUR CODE HERE					                    #
    valerr=[]
    Xmtx=np.asmatrix(Xtrain)
    ymtx=np.asmatrix(ytrain)
    Xvmtx=np.asmatrix(Xval)
    yvmtx=np.asmatrix(yval)
    XmtxT=Xmtx.T
    ymtxT=ymtx.T
    yvmtxT=yvmtx.T
    I=np.identity(Xtrain.shape[1])
    for lambd in lambds:
        w_lambd=np.linalg.pinv(XmtxT*Xmtx+lambd*I)*XmtxT*ymtxT  # D*1
        y_pred=Xval*w_lambd  #N*1
        err=np.average(np.square(np.asarray(yvmtx.T-Xvmtx*w_lambd)))
        valerr.append(err)
    bestlambd=lambds[valerr.index(min(valerr))]
        
  #####################################################		 
    return bestlambd


In [95]:
# =========================Q3.3 tuning lambda======================
lambds = [0, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1, 1, 10, 10 ** 2]
bestlambd = tune_lambda(Xtrain, ytrain, Xval, yval, lambds)
print("\n")
print("======== Question 3.3 tuning lambdas ========")
print("tuning lambda, the best lambda =  ", bestlambd, sep="")



tuning lambda, the best lambda =  0.001


In [79]:
###### Q4.4 ######
def test_error(w, X, y):
    """
    Compute the mean squre error on test set given X, y, and model parameter w.
    Inputs:
    - X: A numpy array of shape (num_samples, D) containing test feature.
    - y: A numpy array of shape (num_samples, ) containing test label
    - w: a numpy array of shape (D, )
    Returns:
    - err: the mean square error
    """
    Xmtx=np.asmatrix(X)      #490*12
    ymtx=np.asmatrix(y)      #1*490
    wmtx=np.asmatrix(w)      #1*12
    wmtxT=wmtx.T             #12*1
    XmtxT=Xmtx.T             #490*12
    ymtxT=ymtx.T             #490*1
    err=np.average(np.square(np.asarray(ymtxT-Xmtx*wmtxT)))
    return err


In [96]:
# =========================Q3.4 report mse on test ======================
wbest = regularized_linear_regression(Xtrain, ytrain, bestlambd)
mse = test_error(wbest, Xtest, ytest)
print("\n")
print("======== Question 3.4 report MSE ========")
print("MSE on test is %.3f" % mse)



MSE on test is 0.512


In [76]:
# Note:
Xtest.shape
np.asmatrix(Xtest).shape
ytest.shape     # array: (490,)
np.asmatrix(ytest).shape   # matrix: (1,490) reverse!!

(490, 12)

In [99]:
"""
Do not change the input and output format.
If our script cannot run your code or the format is improper, your code will not be graded.

The only functions you need to implement in this template is linear_regression_noreg, regularized_linear_regression,
tune_lambda, and test_error.
"""

import numpy as np
import pandas as pd

###### Q4.1 ######
def linear_regression_noreg(X, y):
  """
  Compute the weight parameter given X and y.
  Inputs:
  - X: A numpy array of shape (num_samples, D) containing feature.
  - y: A numpy array of shape (num_samples, ) containing label
  Returns:
  - w: a numpy array of shape (D, )
  """
  #####################################################
  #				 YOUR CODE HERE					                    #
  Xmtx=np.asmatrix(X)
  ymtx=np.asmatrix(y)
  XmtxT=Xmtx.T
  ymtxT=ymtx.T
  w=np.asarray(np.linalg.pinv(XmtxT*Xmtx)*XmtxT*ymtxT)
  w=w.ravel()
  #####################################################		 
  return w

###### Q4.2 ######
def regularized_linear_regression(X, y, lambd):
  """
    Compute the weight parameter given X, y and lambda.
    Inputs:
    - X: A numpy array of shape (num_samples, D) containing feature.
    - y: A numpy array of shape (num_samples, ) containing label
    - lambd: a float number containing regularization strength
    Returns:
    - w: a numpy array of shape (D, )
    """
  #####################################################
  #				 YOUR CODE HERE					                    #
  Xmtx=np.asmatrix(X)
  ymtx=np.asmatrix(y)
  XmtxT=Xmtx.T
  ymtxT=ymtx.T
  I=np.identity(X.shape[1])
  w=np.asarray(np.linalg.pinv(XmtxT*Xmtx+lambd*I)*XmtxT*ymtxT)
  w=w.ravel()
  #####################################################		 
  return w

###### Q4.3 ######
def tune_lambda(Xtrain, ytrain, Xval, yval, lambds):
  """
    Find the best lambda value.
    Inputs:
    - Xtrain: A numpy array of shape (num_training_samples, D) containing training feature.
    - ytrain: A numpy array of shape (num_training_samples, ) containing training label
    - Xval: A numpy array of shape (num_val_samples, D) containing validation feature.
    - yval: A numpy array of shape (num_val_samples, ) containing validation label
    - lambds: a list of lambdas
    Returns:
    - bestlambda: the best lambda you find in lambds
    """
  #####################################################
  #				 YOUR CODE HERE					                    #
  valerr=[]
  Xmtx=np.asmatrix(Xtrain)
  ymtx=np.asmatrix(ytrain)
  Xvmtx=np.asmatrix(Xval)
  yvmtx=np.asmatrix(yval)
  XmtxT=Xmtx.T
  ymtxT=ymtx.T
  yvmtxT=yvmtx.T
  I=np.identity(Xtrain.shape[1])
  for lambd in lambds:
    w_lambd=np.linalg.pinv(XmtxT*Xmtx+lambd*I)*XmtxT*ymtxT  # D*1
    y_pred=Xval*w_lambd  #N*1
    err=np.average(np.square(np.asarray(yvmtx.T-Xvmtx*w_lambd)))
    valerr.append(err)
  bestlambda=lambds[valerr.index(min(valerr))]
  #####################################################		 
  return bestlambda

###### Q4.4 ######
def test_error(w, X, y):
  """
    Compute the mean squre error on test set given X, y, and model parameter w.
    Inputs:
    - X: A numpy array of shape (num_samples, D) containing test feature.
    - y: A numpy array of shape (num_samples, ) containing test label
    - w: a numpy array of shape (D, )
    Returns:
    - err: the mean square error
    """
  Xmtx=np.asmatrix(X)      #490*12
  ymtx=np.asmatrix(y)      #1*490
  wmtx=np.asmatrix(w)      #1*12
  wmtxT=wmtx.T             #12*1
  XmtxT=Xmtx.T             #490*12
  ymtxT=ymtx.T             #490*1
  err=np.average(np.square(np.asarray(ymtxT-Xmtx*wmtxT)))
  return err


"""
NO MODIFICATIONS below this line.
You should only write your code in the above functions.
"""

def data_processing():
  white = pd.read_csv('winequality-white.csv', low_memory=False, sep=';').values

  [N, d] = white.shape

  np.random.seed(3)
  # prepare data
  ridx = np.random.permutation(N)
  ntr = int(np.round(N * 0.8))
  nval = int(np.round(N * 0.1))
  ntest = N - ntr - nval

  # spliting training, validation, and test

  Xtrain = np.hstack([np.ones([ntr, 1]), white[ridx[0:ntr], 0:-1]])

  ytrain = white[ridx[0:ntr], -1]

  Xval = np.hstack([np.ones([nval, 1]), white[ridx[ntr:ntr + nval], 0:-1]])
  yval = white[ridx[ntr:ntr + nval], -1]

  Xtest = np.hstack([np.ones([ntest, 1]), white[ridx[ntr + nval:], 0:-1]])
  ytest = white[ridx[ntr + nval:], -1]
  return Xtrain, ytrain, Xval, yval, Xtest, ytest


def main():
  np.set_printoptions(precision=3)
  Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing()
  # =========================Q3.1 linear_regression=================================
  w = linear_regression_noreg(Xtrain, ytrain)
  print("======== Question 3.1 Linear Regression ========")
  print("dimensionality of the model parameter is ", len(w), ".", sep="")
  print("model parameter is ", np.array_str(w))
  
  # =========================Q3.2 regularized linear_regression=====================
  lambd = 5.0
  wl = regularized_linear_regression(Xtrain, ytrain, lambd)
  print("\n")
  print("======== Question 3.2 Regularized Linear Regression ========")
  print("dimensionality of the model parameter is ", len(wl), sep="")
  print("lambda = ", lambd, ", model parameter is ", np.array_str(wl), sep="")

  # =========================Q3.3 tuning lambda======================
  lambds = [0, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1, 1, 10, 10 ** 2]
  bestlambd = tune_lambda(Xtrain, ytrain, Xval, yval, lambds)
  print("\n")
  print("======== Question 3.3 tuning lambdas ========")
  print("tuning lambda, the best lambda =  ", bestlambd, sep="")

  # =========================Q3.4 report mse on test ======================
  wbest = regularized_linear_regression(Xtrain, ytrain, bestlambd)
  mse = test_error(wbest, Xtest, ytest)
  print("\n")
  print("======== Question 3.4 report MSE ========")
  print("MSE on test is %.3f" % mse)
  
if __name__ == "__main__":
    main()
    

dimensionality of the model parameter is 12.
model parameter is  [  2.166e+02   1.145e-01  -1.824e+00  -1.065e-02   1.037e-01   1.546e-01
   3.416e-03   2.347e-04  -2.173e+02   8.348e-01   7.366e-01   1.153e-01]


dimensionality of the model parameter is 12
lambda = 5.0, model parameter is [  6.324e-01  -2.854e-02  -1.641e+00  -3.577e-02   2.803e-02  -1.362e-01
   5.259e-03  -8.421e-04   5.888e-01   2.887e-01   4.162e-01   3.768e-01]


tuning lambda, the best lambda =  0.001


MSE on test is 0.512
