In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets

In [2]:
# Get Data: Do not touch it.
def get_data():
  data_url = "http://lib.stat.cmu.edu/datasets/boston"
  raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
  X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
  y = raw_df.values[1::2, 2]
  return X,y

In [3]:
# cgs
def cgs(A):
  """
    Q,R = cgs(A)
    Apply classical Gram-Schmidt to mxn rectangular/square matrix. 

    Parameters
    -------
    A: mxn rectangular/square matrix   

    Returns
    -------
    Q: mxn square matrix
    R: nxn upper triangular matrix

  """
  # ADD YOUR CODES
  m = len(A)         # get the number of rows of A
  n = len(A[0])      # get the number of columns of A
  R = np.zeros((n,n)) # create a zero matrix of nxn
  Q = np.ones((m,n))  # copy A (deep copy)
  for k in range(n):
    w = A[:,k]
    for j in range(k-1):
      R[j,k] = np.dot(Q[:,j],w)
    for j in range(k-1):
        w = w - R[j,k]*Q[:,j]
    R[k,k] = np.linalg.norm(w)
    Q[:,k] = w/R[k,k]
  return Q, R

  

In [4]:
# Implement BACK SUBS
def backsubs(U, b):

  """
  x = backsubs(U, b)
  Apply back substitution for the square upper triangular system Ux=b. 

  Parameters
  -------
    U: nxn square upper triangular array
    b: n array
    

  Returns
  -------
    x: n array
  """

  n= U.shape[1]
  x= np.zeros((n,))
  b_copy= np.copy(b)

  if U[n-1,n-1]==0.0:
    if b[n-1] != 0.0:
      print("System has no solution.")
  
  else:
    x[n-1]= b_copy[n-1]/U[n-1,n-1]
  for i in range(n-2,-1,-1):
    if U[i,i]==0.0:
      if b[i]!= 0.0:
        print("System has no solution.")
    else:
      for j in range(i,n):
        b_copy[i] -=U[i,j]*x[j]
      x[i]= b_copy[i]/U[i,i]
  return x

In [14]:
backsubs(X, y)

System has no solution.
System has no solution.


array([-2.71879973e+03,  0.00000000e+00, -1.87647176e-01,  0.00000000e+00,
        1.40790666e+01,  1.86663268e+00,  1.01408277e-01,  4.28123741e+00,
       -4.02435638e+00,  2.74458402e-02, -9.08640665e-01,  1.43698001e-03,
        1.38128581e+00])

In [5]:
# Add ones
def add_ones(X):

  # ADD YOUR CODES
  one = np.ones((len(X))).reshape(len(X),1)

  X = np.hstack((one,X))
  return X

In [13]:
## Get X and y
X,y= get_data()
X

(506, 13)

In [7]:
## Add ones to X
X= add_ones(X)
X

array([[1.0000e+00, 6.3200e-03, 1.8000e+01, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [1.0000e+00, 2.7310e-02, 0.0000e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [1.0000e+00, 2.7290e-02, 0.0000e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [1.0000e+00, 6.0760e-02, 0.0000e+00, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0000e+00, 1.0959e-01, 0.0000e+00, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [1.0000e+00, 4.7410e-02, 0.0000e+00, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [8]:
def split_data(X,Y, train_size):
  # ADD YOUR CODES
  # shuffle the data before splitting it
  

SyntaxError: ignored

In [9]:
# Split (X,y) into X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test= ...

TypeError: ignored

In [10]:
def mse(y, y_pred):
    # ADD YOUR CODES
    pass

In [11]:
from numpy.linalg import inv 

In [12]:
def normalEquation(X,y):
    # ADD YOUR CODES
    theta_hat = inv(np.transpose(X) @ X) @ (np.transpose(X)@y)
    return theta_hat

In [None]:
class LinearRegression:

  def __init__(self, model):
      # ADD YOUR CODES

      
  def fit(self,x,y):
      # ADD YOUR CODES
      self.x = x
      self.y = y
      if self.model == 'normalEquation':
        return inv(np.transpose(self.x) @ self.x) @ (np.transpose(self.x)@self.y)
      elif self.model == 'backsubs':
        return backsubs(self.x, self.y)
      
    
  def predict(self,x):
      #ADD YOUR CODES

IndentationError: ignored

In [None]:
# Instanciate the LinearRegression class 
model= ...

In [None]:
# Train the model

In [None]:
# print the learned theta

In [None]:
# Make a prediction on X_test

In [None]:
# Compute the MSE (Evaluate both, regression and classification)