In [None]:
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import PolynomialFeatures


In [None]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data = pd.read_csv('/content/drive/My Drive/insurance.txt')
data = data.drop(['children'], axis = 1)
data

Unnamed: 0,age,bmi,charges
0,19,27.900,16884.92400
1,18,33.770,1725.55230
2,28,33.000,4449.46200
3,33,22.705,21984.47061
4,32,28.880,3866.85520
...,...,...,...
1333,50,30.970,10600.54830
1334,18,31.920,2205.98080
1335,18,36.850,1629.83350
1336,21,25.800,2007.94500


In [None]:
X = data[['age','bmi']]
Y = data['charges']
Y = Y.to_numpy()

In [None]:
def poly_features(X, deg):
  poly = PolynomialFeatures(degree = deg)
  ftr = poly.fit_transform(X)
  return ftr

In [None]:
def splitData(X, Y):
    np.random.seed(0)
    msk = np.random.rand(len(X)) < 0.7
    
    featureTrain = X[msk,:]
    targetTrain = Y[msk]
    featureTest = X[~msk,:]
    targetTest = Y[~msk]
    
    return featureTrain, featureTest, targetTrain, targetTest

In [None]:
def splitDataReg(X, Y):
    np.random.seed(0)
    msk = np.random.rand(len(X)) < 0.7
    
    featureTrain = X[msk,:]
    targetTrain = Y[msk]
    
    
    featureTest_And_val = X[~msk,:]
    targetTest_And_val = Y[~msk]

    msk2 = np.random.rand(len(featureTest_And_val)) < 0.67

    featureTest = featureTest_And_val[~msk2,:]
    targetTest = targetTest_And_val[~msk2]

    featureValidation = featureTest_And_val[msk2,:]
    targetValidation = targetTest_And_val[msk2]
    
    #print(len(featureTrain), len(featureTest))
    return featureTrain, featureTest, targetTrain, targetTest, featureValidation, targetValidation


In [None]:
def standardize(feature_matrix, target):
  min_array=np.amin(feature_matrix,axis=0)
  max_array=np.amax(feature_matrix,axis=0)
  for i in range(1,feature_matrix.shape[1]):  # standardize features
    for j in range(feature_matrix.shape[0]):
      feature_matrix[j][i]=((feature_matrix[j][i]-min_array[i])/(max_array[i]-min_array[i]))
  targetMin=target.min();
  targetMax=target.max();
  target=(target-target.min())/(target.max()-target.min()) # standardize target
  return (feature_matrix,target,targetMax,targetMin)

**Without regularization**

In [None]:
def cost_function(theta, X, Y):
  m = X.shape[0]
  pred = X.dot(theta)
  cost = (1/(2*m))*np.sum(np.square(Y-pred))
  return cost

In [None]:
def gradient_descent(X, Y, theta, alpha, num_iters, Y_max, Y_min):
  m = X.shape[0]
  for i in range(num_iters):
    pred = X.dot(theta)
    cost_der = (1/m)*np.dot(X.transpose(), (pred-Y))
    theta = theta - alpha*cost_der    # update weights

    if i%50 == 0:                    # print error after every 50 iterations
      Y_pred = X.dot(theta)
      rmse = math.sqrt(np.mean(np.square(Y_pred - Y)))
      rmse = rmse*(Y_max - Y_min) + Y_min
      print("Iteration " + str(i) + " : " + str(rmse))

  return theta

In [None]:
def gradient_descent_model(X_train, Y_train, X_test, Y_test, Y_train_max, Y_train_min, Y_test_max, Y_test_min, alpha, num_iters):          
  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = gradient_descent(X_train, Y_train, theta, alpha, num_iters, Y_train_max, Y_train_min)  # calculate weights using gradient descent

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))   #calcuate and print train and test errors
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print("Final training set RMSE : " + str(rmse_train))
  print("Final test set RMSE : " + str(rmse_test))
  print(theta)

In [None]:
for i in range(10):                                      #apply polynomial regression for polynomials of degrees from 1 to 10
  Xi = poly_features(X, i+1)
  X_train, X_test, Y_train, Y_test = splitData(Xi, Y)       #split data into train and test sets           
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train) #standardize train set
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test) #stanardize test set
  print("\nPOLYNOMIAL OF DEGREE " + str(i+1))
  gradient_descent_model(X_train, Y_train, X_test, Y_test, Y_train_max, Y_train_min, Y_test_max, Y_test_min, 0.01, 1000)


POLYNOMIAL OF DEGREE 1
Iteration 0 : 17905.623351554874
Iteration 50 : 14100.66081103754
Iteration 100 : 12979.593752988978
Iteration 150 : 12678.807268619246
Iteration 200 : 12593.544319943116
Iteration 250 : 12562.513227255473
Iteration 300 : 12545.417008316386
Iteration 350 : 12532.332705048895
Iteration 400 : 12520.796564662853
Iteration 450 : 12510.168571534463
Iteration 500 : 12500.256019001654
Iteration 550 : 12490.977780630858
Iteration 600 : 12482.282329214753
Iteration 650 : 12474.127597385897
Iteration 700 : 12466.475914937415
Iteration 750 : 12459.292629082403
Iteration 800 : 12452.545631546063
Iteration 850 : 12446.205115713836
Iteration 900 : 12440.243398447394
Iteration 950 : 12434.634765761857
Final training set RMSE : 12429.457840641535
Final test set RMSE : 13009.745375207587
[0.10045177 0.12978462 0.08188405]

POLYNOMIAL OF DEGREE 2
Iteration 0 : 17860.817057744775
Iteration 50 : 13410.68216020709
Iteration 100 : 12552.891593267324
Iteration 150 : 12406.55242718879


In [None]:
def stochastic_descent(X, Y, theta, alpha, num_iters, Y_max, Y_min):
  m = X.shape[0]
  c = 0

  np.random.seed(seed=1)                    # choose one data point at a time
  permutation = np.random.permutation(m)
  X = X[permutation,:]
  Y = Y[permutation]

  for i in range(num_iters):
    X_i = X[c:c+1,:]
    Y_i = Y[c:c+1]
    pred = X_i.dot(theta)
    cost_der = np.dot(X_i.transpose(), (pred-Y_i))
    theta = theta - alpha*cost_der         # update wights  

    if i%50 == 0:                          # print error after every 50 iterations
      Y_pred = X.dot(theta)
      rmse = math.sqrt(np.mean(np.square(Y_pred - Y)))
      rmse = rmse*(Y_max - Y_min) + Y_min
      print("Iteration " + str(i) + " : " + str(rmse))

    if c+1 > m:                           # shuffle the dataset after all data points are processed
      c = 0
      permutation = np.random.permutation(m)
      X = X[permutation,:]
      Y = Y[permutation]
    else:
      c = c + 1
    
  
  return theta

In [None]:
def stochastic_descent_model(X_train, Y_train, X_test, Y_test, Y_train_max, Y_train_min, Y_test_max, Y_test_min, alpha, num_iters):
  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = stochastic_descent(X_train, Y_train, theta, alpha, num_iters, Y_train_max, Y_train_min) # calculate weights using stochastic gradient descent

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))       # calculate and print train and test errors
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print("Final training set RMSE : " + str(rmse_train))
  print("Final test set RMSE : " + str(rmse_test))
  print("Weights : ")
  print(theta)

In [None]:
for i in range(10):                                      #apply polynomial regression for polynomials of degrees from 1 to 10
  Xi = poly_features(X, i+1)
  X_train, X_test, Y_train, Y_test = splitData(Xi, Y)       #split data into train and test sets           
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train) #standardize train set
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test) #stanardize test set
  print("\nPOLYNOMIAL OF DEGREE " + str(i+1))
  stochastic_descent_model(X_train, Y_train, X_test, Y_test, Y_train_max, Y_train_min, Y_test_max, Y_test_min, 0.01, 1000)


POLYNOMIAL OF DEGREE 1
Iteration 0 : 17964.325886550923
Iteration 50 : 13443.860752733299
Iteration 100 : 13102.503762711192
Iteration 150 : 12774.32552122636
Iteration 200 : 12614.262978700715
Iteration 250 : 12581.046823085999
Iteration 300 : 12552.386052240678
Iteration 350 : 12550.128620902833
Iteration 400 : 12538.138902566267
Iteration 450 : 12621.665847898093
Iteration 500 : 12557.952659175327
Iteration 550 : 12529.644329214572
Iteration 600 : 12501.731457715125
Iteration 650 : 12513.949626005613
Iteration 700 : 12564.954505670246
Iteration 750 : 12516.203526011941
Iteration 800 : 12479.841484817754
Iteration 850 : 12464.036216271246
Iteration 900 : 12475.27872602608
Iteration 950 : 12452.691807530684
Final training set RMSE : 12424.089498909932
Final test set RMSE : 13000.588858942969
Weights : 
[0.09966605 0.13225786 0.08382823]

POLYNOMIAL OF DEGREE 2
Iteration 0 : 17946.098789442447
Iteration 50 : 12729.968071125331
Iteration 100 : 12723.174584349155
Iteration 150 : 12482.2

**Ridge Regression**

In [None]:
def cost_function_ridge(theta, X, Y, lam):
  m = X.shape[0]
  pred = X.dot(theta)
  cost = (1/(2*m))*np.sum(np.square(Y-pred)) + lam*(np.sum(np.square(theta)))
  return cost

In [None]:
def gradient_descent_ridge(X, Y, theta, alpha, lam, num_iters, Y_max, Y_min):
  m = X.shape[0]
  for i in range(num_iters):
    pred = X.dot(theta)
    cost_der = (1/m)*np.dot(X.transpose(), (pred-Y)) 
    theta = theta*(1 - alpha*lam) - alpha*cost_der

    if i%100 == 0:
      Y_pred = X.dot(theta)
      rmse = math.sqrt(np.mean(np.square(Y_pred - Y)))
      rmse = rmse*(Y_max - Y_min) + Y_min
      #print(color.BLUE+"Iteration " + str(i) + " : " + str(rmse))

  return theta

In [None]:
def gradient_descent_model_ridge(X, Y, alpha, lam, num_iters):
  print(X)
  X_train, X_test, Y_train, Y_test = splitData(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)

  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = gradient_descent_ridge(X_train, Y_train, theta, alpha, lam, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print("Final training set RMSE : " + str(rmse_train))
  print("Final test set RMSE : " + str(rmse_test))
  print(theta)

In [None]:
 def stochastic_descent_ridge(X, Y, theta, alpha, lam, num_iters, Y_max, Y_min):
  m = X.shape[0]
  c = 0

  np.random.seed(seed=1) 
  permutation = np.random.permutation(m)
  X = X[permutation,:]
  Y = Y[permutation]

  for i in range(num_iters):
    X_i = X[c:c+1,:]
    Y_i = Y[c:c+1]
    pred = X_i.dot(theta)
    cost_der = np.dot(X_i.transpose(), (pred-Y_i))
    theta = theta*(1 - alpha*lam) - alpha*cost_der

    if i%100 == 0:
      Y_pred = X.dot(theta)
      rmse = math.sqrt(np.mean(np.square(Y_pred - Y)))
      rmse = rmse*(Y_max - Y_min) + Y_min
      #print("Iteration " + str(i) + " : " + str(rmse))

    if c+1 > m:
      c = 0
      permutation = np.random.permutation(m)
      X = X[permutation,:]
      Y = Y[permutation]
    else:
      c = c + 1
    
  
  return theta

In [None]:
def stochastic_descent_model_ridge(X, Y, alpha, lam, num_iters):
  X_train, X_test, Y_train, Y_test = splitData(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)
  
  print(X_train.shape, Y_train.shape)
  
  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = stochastic_descent_ridge(X_train, Y_train, theta, alpha, lam, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print("Final training set RMSE : " + str(rmse_train))
  print("Final test set RMSE : " + str(rmse_test))
  print(theta)

**Lasso Regression**

In [None]:
def cost_function_lasso(theta, X, Y, lam):
  m = X.shape[0]
  pred = X.dot(theta)
  cost = (1/(2*m))*np.sum(np.square(Y-pred)) + lam*(np.sum(np.abs(theta)))
  return cost

In [None]:
def gradient_descent_lasso(X, Y, theta, alpha, lam, num_iters, Y_max, Y_min):
  m, n = X.shape
  for i in range(num_iters):
    Y_pred = X.dot(theta)
    for j in range(n):
      if j == 0:
        theta[j] = theta[j] + alpha*2*np.sum(Y - Y_pred) / m
      else:
        if theta[j] > 0 :
          theta[j] = theta[j] - alpha*( - ( 2 * (X[:, j]).dot(Y - Y_pred)) + lam ) / m 
        else :
          theta[j] = theta[j] - alpha*( - ( 2 * ( X[:, j] ).dot(Y - Y_pred))- lam ) / m   
   
    if i%100 == 0:
      Y_pred = X.dot(theta)
      rmse = math.sqrt(np.mean(np.square(Y_pred - Y)))
      rmse = rmse*(Y_max - Y_min) + Y_min
      #print("Iteration " + str(i) + " : " + str(rmse))

  return theta

In [None]:
def gradient_descent_model_lasso(X, Y, alpha, num_iters):
  X_train, X_test, Y_train, Y_test = splitData(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)

  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = gradient_descent_lasso(X_train, Y_train, theta, alpha, 0.3, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print("Final training set RMSE : " + str(rmse_train))
  print("Final test set RMSE : " + str(rmse_test))
  print(theta)

In [None]:
def stochastic_descent_lasso(X, Y, theta, alpha, lam, num_iters, Y_max, Y_min):
  m, n = X.shape
  c = 0

  np.random.seed(seed=1) 
  permutation = np.random.permutation(m)
  X = X[permutation,:]
  Y = Y[permutation]

  for i in range(num_iters):
    X_i = X[c:c+1,:]
    Y_i = Y[c:c+1]

    Y_pred = X_i.dot(theta)

    for j in range(n):
      if j == 0:
        theta[j] = theta[j] + alpha*2*np.sum(Y_i - Y_pred) / m
      else:
        if theta[j] > 0 :
          theta[j] = theta[j] - alpha*( - ( 2 * (X_i[:,j]).dot(Y_i - Y_pred)) + lam ) / m 
        else :
          theta[j] = theta[j] - alpha*( - ( 2 * ( X_i[:,j] ).dot(Y_i - Y_pred))- lam ) / m 

    if i%100 == 0:
      Y_pred = X.dot(theta)
      rmse = math.sqrt(np.mean(np.square(Y_pred - Y)))
      rmse = rmse*(Y_max - Y_min) + Y_min
      #print("Iteration " + str(i) + " : " + str(rmse))

    if c+1 > m:
      c = 0
      permutation = np.random.permutation(m)
      X = X[permutation,:]
      Y = Y[permutation]
    else:
      c = c + 1
    
  
  return theta

In [None]:
def stochastic_descent_model_lasso(X, Y, alpha, lam, num_iters):
  X_train, X_test, Y_train, Y_test = splitData(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)
  
  print(X_train.shape, Y_train.shape)
  
  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = stochastic_descent_lasso(X_train, Y_train, theta, alpha, lam, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print("Final training set RMSE : " + str(rmse_train))
  print("Final test set RMSE : " + str(rmse_test))
  print(theta)

In [None]:
Xi = poly_features(X,5)
stochastic_descent_model_lasso(Xi, Y, 0.1, 0.4, 10000)

(932, 21) (932,)
Iteration 0 : 18018.237956232486
Iteration 100 : 17842.8477792137
Iteration 200 : 17669.38354848155
Iteration 300 : 17507.25738754931
Iteration 400 : 17332.748011773958
Iteration 500 : 17180.88440239027
Iteration 600 : 17017.978454748165
Iteration 700 : 16904.97713319927
Iteration 800 : 16768.919417577275
Iteration 900 : 16614.742478582873
Iteration 1000 : 16457.193264556943
Iteration 1100 : 16341.875858300602
Iteration 1200 : 16204.546702921401
Iteration 1300 : 16069.36218888299
Iteration 1400 : 15979.276741736658
Iteration 1500 : 15878.510721343077
Iteration 1600 : 15792.600050144987
Iteration 1700 : 15709.169895455445
Iteration 1800 : 15594.743951896357
Iteration 1900 : 15486.9713623519
Iteration 2000 : 15384.170959468975
Iteration 2100 : 15301.236298485628
Iteration 2200 : 15236.545917608686
Iteration 2300 : 15153.092053554119
Iteration 2400 : 15061.118443197092
Iteration 2500 : 14969.020634853014
Iteration 2600 : 14889.915509145228
Iteration 2700 : 14824.617922473

**Choosing the best regularization term for ridge regression**

In [None]:
import random
def choose_penalty_ridge_gd(num_penalty_values, X, Y, alpha, num_iters):
  penalty_values = np.array([])
  for i in range(num_penalty_values):
    #print(random.uniform(0, 1))
    penalty_values=np.append(penalty_values,np.array([random.uniform(0, 1)]))
  #print(penalty_values) 
  #print(X)
  #print(Y)
  X_train, X_test, Y_train, Y_test, X_validation, Y_validation = splitDataReg(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)
  X_validation, Y_validation, Y_validation_max, Y_validation_min = standardize(X_validation, Y_validation)
  min_penalty=-1
  min_error=1e9
  for i in penalty_values:
    #print("added")
    theta = np.zeros(X_train.shape[1])
    theta = np.transpose(theta)
    theta = gradient_descent_ridge(X_train, Y_train, theta, alpha, i, num_iters, Y_train_max, Y_train_min)

    Y_pred_train = X_train.dot(theta)
    Y_pred_validation = X_validation.dot(theta)

    rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
    rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
    rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
    rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min

    print(color.BOLD+color.RED+"Penalty : ",i," train error : ",rmse_train," validation error : ",rmse_validation,color.END)

    if rmse_validation < min_error :
      min_penalty = i
      min_error = rmse_validation

  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = gradient_descent_ridge(X_train, Y_train, theta, alpha, min_penalty, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)
  Y_pred_validation = X_validation.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
  rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print(color.BOLD+color.RED+color.UNDERLINE+"Best Penalty : ",min_penalty," Best train error : ",rmse_train," Best validation error : ",rmse_validation," Best testing error : ",rmse_test,color.END)
  return min_penalty


In [None]:
def choose_penalty_ridge_model_gd(X,Y,num_penalty_values,alpha, num_iters):
  for i in range(10):
    Xi=poly_features(X, 1+i)
    print(color.DARKCYAN+"\n \nPolynomial degree : ", i+1,color.END+"\n\n")
    lam = choose_penalty_ridge_gd(num_penalty_values, Xi, Y, alpha, num_iters)

In [None]:
choose_penalty_ridge_model_gd(X,Y,10,0.01, 10000)

[36m
 
Polynomial degree :  1 [0m


[1m[91mPenalty :  0.3905388213554941  train error :  12874.43634939963  validation error :  13986.757207326315 [0m
[1m[91mPenalty :  0.646767920896396  train error :  13234.220313146712  validation error :  14394.817567395256 [0m
[1m[91mPenalty :  0.7579376512307928  train error :  13387.844422238606  validation error :  14561.88761079304 [0m
[1m[91mPenalty :  0.40276646253945536  train error :  12891.425369212091  validation error :  14006.766448070568 [0m
[1m[91mPenalty :  0.7155198899831008  train error :  13329.658877866881  validation error :  14498.979958786393 [0m
[1m[91mPenalty :  0.6190077874509634  train error :  13195.36899248598  validation error :  14352.025349821966 [0m
[1m[91mPenalty :  0.15439158958118993  train error :  12558.724825590863  validation error :  13593.576158681377 [0m
[1m[91mPenalty :  0.015458344010213065  train error :  12341.512654617767  validation error :  13325.183402542412 [0m
[1m[91mP

In [None]:
def choose_penalty_ridge_sgd(num_penalty_values, X, Y, alpha, num_iters):
  penalty_values = np.array([])
  for i in range(num_penalty_values):
    #print(random.uniform(0, 1))
    penalty_values=np.append(penalty_values,np.array([random.uniform(0, 1)]))
  #print(penalty_values) 
  #print(X)
  #print(Y)
  X_train, X_test, Y_train, Y_test, X_validation, Y_validation = splitDataReg(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)
  X_validation, Y_validation, Y_validation_max, Y_validation_min = standardize(X_validation, Y_validation)
  min_penalty=-1
  min_error=1e9
  for i in penalty_values:
    #print("added")
    theta = np.zeros(X_train.shape[1])
    theta = np.transpose(theta)
    theta = stochastic_descent_ridge(X_train, Y_train, theta, alpha, i, num_iters, Y_train_max, Y_train_min)

    Y_pred_train = X_train.dot(theta)
    Y_pred_validation = X_validation.dot(theta)

    rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
    rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
    rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
    rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min

    print(color.BOLD+color.RED+"Penalty : ",i," train error : ",rmse_train," validation error : ",rmse_validation,color.END)

    if rmse_validation < min_error :
      min_penalty = i
      min_error = rmse_validation

  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = stochastic_descent_ridge(X_train, Y_train, theta, alpha, min_penalty, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)
  Y_pred_validation = X_validation.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
  rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print(color.BOLD+color.RED+color.UNDERLINE+"Best Penalty : ",min_penalty," Best train error : ",rmse_train," Best validation error : ",rmse_validation," Best testing error : ",rmse_test,color.END)
  return min_penalty


In [None]:
def choose_penalty_ridge_model_sgd(X,Y,num_penalty_values,alpha, num_iters):
  for i in range(10):
    Xi=poly_features(X, 1+i)
    print(color.DARKCYAN+"\n \nPolynomial degree : ", i+1,color.END+"\n\n")
    lam = choose_penalty_ridge_sgd(num_penalty_values, Xi, Y, alpha, num_iters)

In [None]:
choose_penalty_ridge_model_sgd(X,Y,10,0.01, 10000)

[36m
 
Polynomial degree :  1 [0m


[1m[91mPenalty :  0.7020962354230136  train error :  13046.780331018072  validation error :  14180.377318793107 [0m
[1m[91mPenalty :  0.11950899806090631  train error :  12477.1993255209  validation error :  13425.610891113156 [0m
[1m[91mPenalty :  0.9464763163799624  train error :  13320.412171079734  validation error :  14486.277526661352 [0m
[1m[91mPenalty :  0.7180883535933147  train error :  13064.81376150586  validation error :  14201.031087685835 [0m
[1m[91mPenalty :  0.5567118838177436  train error :  12884.472456809885  validation error :  13989.937794342834 [0m
[1m[91mPenalty :  0.10737358283603637  train error :  12467.436294075855  validation error :  13409.551674104063 [0m
[1m[91mPenalty :  0.8582770021064852  train error :  13222.52873686813  validation error :  14378.48299402963 [0m
[1m[91mPenalty :  0.1747519870005042  train error :  12519.920471378144  validation error :  13495.933215554986 [0m
[1m[91mPena

**Choosing the best regularization term for lasso regression**

In [None]:
def choose_penalty_lasso_gd(num_penalty_values, X, Y, alpha, num_iters):
  penalty_values = np.array([])

  for i in range(num_penalty_values):
    penalty_values=np.append(penalty_values,np.array([random.uniform(0, 1)]))
 
  X_train, X_test, Y_train, Y_test, X_validation, Y_validation = splitDataReg(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)
  X_validation, Y_validation, Y_validation_max, Y_validation_min = standardize(X_validation, Y_validation)

  min_penalty=-1
  min_error=1e9

  for i in penalty_values:
    #print("added")
    theta = np.zeros(X_train.shape[1])
    theta = np.transpose(theta)
    theta = gradient_descent_lasso(X_train, Y_train, theta, alpha, i, num_iters, Y_train_max, Y_train_min)

    Y_pred_train = X_train.dot(theta)
    Y_pred_validation = X_validation.dot(theta)

    rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
    rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
    rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
    rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min

    print(color.BOLD+color.RED+"Penalty : ",i," train error : ",rmse_train," validation error : ",rmse_validation,color.END)

    if rmse_validation < min_error :
      min_penalty = i
      min_error = rmse_validation

  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = gradient_descent_lasso(X_train, Y_train, theta, alpha, min_penalty, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)
  Y_pred_validation = X_validation.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
  rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print(color.BOLD+color.RED+color.UNDERLINE+"Best Penalty : ",min_penalty," Best train error : ",rmse_train," Best validation error : ",rmse_validation," Best testing error : ",rmse_test,color.END)
  return min_penalty

In [None]:
def choose_penalty_lasso_model_gd(X,Y,num_penalty_values,alpha, num_iters):
  for i in range(10):
    Xi=poly_features(X, 1+i)
    print(color.DARKCYAN+"\n \nPolynomial degree : ", i+1,color.END+"\n\n")
    lam = choose_penalty_lasso_gd(num_penalty_values, Xi, Y, alpha, num_iters)

In [None]:
choose_penalty_lasso_model_gd(X,Y,10,0.01, 1000)

[36m
 
Polynomial degree :  1 [0m


[1m[91mPenalty :  0.875341771445816  train error :  12376.4770874824  validation error :  13338.758170580131 [0m
[1m[91mPenalty :  0.8459722484724913  train error :  12376.173958747906  validation error :  13338.472607153963 [0m
[1m[91mPenalty :  0.36600560730278364  train error :  12371.344004553794  validation error :  13333.921312204136 [0m
[1m[91mPenalty :  0.773688460802014  train error :  12375.43162531538  validation error :  13337.773252983165 [0m
[1m[91mPenalty :  0.21440920839879207  train error :  12369.867018643215  validation error :  13332.52904537417 [0m
[1m[91mPenalty :  0.04695617846033018  train error :  12368.262650819092  validation error :  13331.016415897999 [0m
[1m[91mPenalty :  0.7788340508429864  train error :  12375.484294016302  validation error :  13337.822874024196 [0m
[1m[91mPenalty :  0.20715511289570965  train error :  12369.796927428997  validation error :  13332.462968366848 [0m
[1m[91mPen

In [None]:
def choose_penalty_lasso_sgd(num_penalty_values, X, Y, alpha, num_iters):
  penalty_values = np.array([])

  for i in range(num_penalty_values):
    penalty_values=np.append(penalty_values,np.array([random.uniform(0, 1)]))
 
  X_train, X_test, Y_train, Y_test, X_validation, Y_validation = splitDataReg(X, Y)
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train)
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test)
  X_validation, Y_validation, Y_validation_max, Y_validation_min = standardize(X_validation, Y_validation)

  min_penalty=-1
  min_error=1e9

  for i in penalty_values:
    #print("added")
    theta = np.zeros(X_train.shape[1])
    theta = np.transpose(theta)
    theta = stochastic_descent_lasso(X_train, Y_train, theta, alpha, i, num_iters, Y_train_max, Y_train_min)

    Y_pred_train = X_train.dot(theta)
    Y_pred_validation = X_validation.dot(theta)

    rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
    rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
    rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
    rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min

    print(color.BOLD+color.RED+"Penalty : ",i," train error : ",rmse_train," validation error : ",rmse_validation,color.END)

    if rmse_validation < min_error :
      min_penalty = i
      min_error = rmse_validation

  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = stochastic_descent_lasso(X_train, Y_train, theta, alpha, min_penalty, num_iters, Y_train_max, Y_train_min)

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)
  Y_pred_validation = X_validation.dot(theta)

  rmse_train = math.sqrt(np.mean(np.square(Y_pred_train - Y_train)))
  rmse_train = rmse_train*(Y_train_max - Y_train_min) + Y_train_min
  rmse_validation = math.sqrt(np.mean(np.square(Y_pred_validation - Y_validation)))
  rmse_validation = rmse_validation*(Y_validation_max - Y_validation_min) + Y_validation_min
  rmse_test = math.sqrt(np.mean(np.square(Y_pred_test - Y_test)))
  rmse_test = rmse_test*(Y_test_max - Y_test_min) + Y_test_min

  print(color.BOLD+color.RED+color.UNDERLINE+"Best Penalty : ",min_penalty," Best train error : ",rmse_train," Best validation error : ",rmse_validation," Best testing error : ",rmse_test,color.END)
  return min_penalty

In [None]:
def choose_penalty_lasso_model_sgd(X,Y,num_penalty_values,alpha, num_iters):
  for i in range(10):
    Xi=poly_features(X, 1+i)
    print(color.DARKCYAN+"\n \nPolynomial degree : ", i+1,color.END+"\n\n")
    lam = choose_penalty_lasso_sgd(num_penalty_values, Xi, Y, alpha, num_iters)

In [None]:
choose_penalty_lasso_model_sgd(X,Y,10,0.1, 10000)

[36m
 
Polynomial degree :  1 [0m


[1m[91mPenalty :  0.9167774567305519  train error :  13154.015259179938  validation error :  14151.861345817599 [0m
[1m[91mPenalty :  0.827991353896323  train error :  13152.739770051729  validation error :  14150.41100480587 [0m
[1m[91mPenalty :  0.5524180791724337  train error :  13152.741029760724  validation error :  14150.382348720848 [0m
[1m[91mPenalty :  0.692926262811713  train error :  13152.75050282092  validation error :  14150.442245831284 [0m
[1m[91mPenalty :  0.3459094739151233  train error :  13152.857070405686  validation error :  14150.537132181547 [0m
[1m[91mPenalty :  0.0027416359532291823  train error :  12595.15258636861  validation error :  13573.162158126368 [0m
[1m[91mPenalty :  0.5444785336830842  train error :  13152.947753283053  validation error :  14150.657205476125 [0m
[1m[91mPenalty :  0.6205706308351374  train error :  13152.372206974098  validation error :  14149.990034311788 [0m
[1m[91mPen

**3D Plot for Gradient Descent**

In [None]:
# qt5 package required to run this cell
for i in range(10):                                      #apply polynomial regression for polynomials of degrees from 1 to 10
  Xi = poly_features(X, i+1)
  X_train, X_test, Y_train, Y_test = splitData(Xi, Y)       #split data into train and test sets           
  X_train, Y_train, Y_train_max, Y_train_min = standardize(X_train, Y_train) #standardize train set
  X_test, Y_test, Y_test_max, Y_test_min = standardize(X_test, Y_test) #stanardize test set
  theta = np.zeros(X_train.shape[1])
  theta = np.transpose(theta)
  theta = gradient_descent(X_train, Y_train, theta, 0.01, 1000, Y_train_max, Y_train_min)  # calculate weights using gradient descent

  Y_pred_train = X_train.dot(theta)
  Y_pred_test = X_test.dot(theta)

  degreeStr = 'Degree' + str(i)

  ax = plt.figure(i).gca(projection="3d")


  ax.scatter(X_train[:,1], X_train[:,2], Y_train.flatten(), color='red', marker='+') #ploting points
  ax.plot_trisurf(X_train[:,1], X_train[:,2], Y_pred_train.flatten(), cmap="viridis", antialiased=False) #ploting the surface

plt.show()