In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from numpy import genfromtxt
from sklearn import kernel_ridge
import os
print(len(os.sched_getaffinity(0)))
from numba import jit, njit, prange

2


In [2]:
path_to_train = '/content/wine_training.csv'
path_to_test = '/content/wine_test.csv'

train = genfromtxt(path_to_train, delimiter=',')
test = genfromtxt(path_to_test, delimiter=',')

print('TRAIN SET')
print(train)
print('ALL BUT LAST')
print(train[:, :-1])
print('LAST')
print(train[:, -1])

TRAIN SET
[[ 6.1   0.6   0.08 ...  0.54 11.    5.  ]
 [10.3   0.53  0.48 ...  0.59  9.3   6.  ]
 [ 6.9   0.49  0.19 ...  0.64  9.8   6.  ]
 ...
 [ 7.    0.38  0.49 ...  0.77 11.4   6.  ]
 [10.    0.35  0.47 ...  0.52 12.    6.  ]
 [ 7.    0.6   0.3  ...  1.17 10.2   5.  ]]
ALL BUT LAST
[[ 6.1   0.6   0.08 ...  3.38  0.54 11.  ]
 [10.3   0.53  0.48 ...  3.12  0.59  9.3 ]
 [ 6.9   0.49  0.19 ...  3.38  0.64  9.8 ]
 ...
 [ 7.    0.38  0.49 ...  3.39  0.77 11.4 ]
 [10.    0.35  0.47 ...  3.23  0.52 12.  ]
 [ 7.    0.6   0.3  ...  3.3   1.17 10.2 ]]
LAST
[5. 6. 6. ... 6. 6. 5.]


In [3]:
norm_train = (train - train.mean(axis=0)) / train.std(axis=0)
norm_test = (test - test.mean(axis=0)) / test.std(axis=0)

In [4]:
#Use linear regression to fit a linear model to the training set 
LinearRegressor = sklearn.linear_model.LinearRegression()
LinearRegressor.fit(norm_train[:, :-1], norm_train[:, -1])
predicted_train = LinearRegressor.predict(norm_train[:, :-1])
print(f'LINEAR REGRESSION TRAINING MSE: {sklearn.metrics.mean_squared_error(predicted_train, norm_train[:, -1])}')
predicted_test = LinearRegressor.predict(norm_test[:, :-1])
print(f'LINEAR REGRESSION TEST MSE: {sklearn.metrics.mean_squared_error(predicted_test, norm_test[:, -1])}')

LINEAR REGRESSION TRAINING MSE: 0.6278484956554817
LINEAR REGRESSION TEST MSE: 0.7021527395264012


In [8]:
#Use kernel ridge regression to fit a non-linear model to the training set

#Gaussian (RBF) Kernel 
def GaussianError(args):
  RBFRegresor = kernel_ridge.KernelRidge(alpha = args[0], kernel = 'rbf', gamma = args[1])
  RBFRegresor.fit(norm_train[:, :-1], norm_train[:, -1])
  predicted_train = RBFRegresor.predict(norm_train[:, :-1])
  #print(f'GAUSSIAN KERNEL TRAINING MSE: {sklearn.metrics.mean_squared_error(predicted_train, norm_train[:, -1])}')
  predicted_test = RBFRegresor.predict(norm_test[:, :-1])
  #print(f'GAUSSIAN KERNEL TEST MSE: {sklearn.metrics.mean_squared_error(predicted_test, norm_test[:, -1])}')
  train_mse = sklearn.metrics.mean_squared_error(predicted_train, norm_train[:, -1])
  test_mse = sklearn.metrics.mean_squared_error(predicted_test, norm_test[:, -1])
  return train_mse

#Laplacian kernel
@njit(nogil=True)
def LaplacianError(alpha, gamma, best_train):
  LaplacianRegresor = kernel_ridge.KernelRidge(alpha = 1.0, kernel = 'laplacian', gamma = 1.0)
  LaplacianRegresor.fit(norm_train[:, :-1], norm_train[:, -1])
  predicted_train = LaplacianRegresor.predict(norm_train[:, :-1])
  print(f'LAPLACIAN KERNEL TRAINING MSE: {sklearn.metrics.mean_squared_error(predicted_train, norm_train[:, -1])}')
  predicted_test = LaplacianRegresor.predict(norm_test[:, :-1])
  print(f'LAPLACIAN KERNEL TEST MSE: {sklearn.metrics.mean_squared_error(predicted_test, norm_test[:, -1])}')
  train_mse = sklearn.metrics.mean_squared_error(predicted_train, norm_train[:, -1])
  if train_mse > best_train:
    best_alpha = alpha
    best_gamma = gamma

In [9]:
print(GaussianError((1.0, 1.0)))

0.20315143936568633


In [34]:
from concurrent.futures import ThreadPoolExecutor
import itertools
best_alpha = 0
executor = ThreadPoolExecutor(len(os.sched_getaffinity(0)))
alpha_range = np.arange(0, 10, 1)
gamma_range = np.arange(0, 10, 1)
all_combinations = itertools.product(alpha_range, gamma_range)
results = executor.map(GaussianError, all_combinations)
print(list(results))
max_result = max(list(results))
print(max_result)
print(list(results))

  "Singular matrix in solving dual problem. Using "
  "Singular matrix in solving dual problem. Using "
  "Singular matrix in solving dual problem. Using "
  "Singular matrix in solving dual problem. Using "
  "Singular matrix in solving dual problem. Using "
  "Singular matrix in solving dual problem. Using "
  "Singular matrix in solving dual problem. Using "


[1.073770314029843, 3.5308512057016115e-29, 3.5438611647519476e-29, 7.154699781991392e-29, 9.758411697956191e-29, 1.6241898743136383e-28, 2.0451609473901214e-28, 2.8143729038573282e-28, 3.75936481453413e-28, 4.450225698635913e-28, 0.9999999999999892, 0.20315143936568633, 0.2073378034585659, 0.21521872365735492, 0.21898420244623512, 0.22094667732490614, 0.22211007441480654, 0.22287672520765825, 0.22342655205334072, 0.22384777594062416, 0.9999999999999893, 0.3500168038174951, 0.3791006871848673, 0.3936538379494036, 0.3996488855205645, 0.4026502533820927, 0.40442041743177576, 0.40559388530641294, 0.40644071976551993, 0.40709100172248514, 0.9999999999999893, 0.4471141664165879, 0.49159786535532957, 0.5085458800166198, 0.5151013240433882, 0.5183263332870892, 0.5202246505910707, 0.5214868773208996, 0.5224007702564363, 0.5231038237937479, 0.9999999999999893, 0.5166046647611476, 0.5697531546110063, 0.5872233049039937, 0.5937339804757091, 0.5969050587839585, 0.598769782811598, 0.600011915362489

ValueError: ignored

In [None]:
best_alpha = 0 
best_gamma = 0
@jit(nogil=True, parallel=True)
def run_sims(end=4):
    for alpha in prange(int(end/0.1)):
      for gamma in prange(int(end/0.1)):
        if alpha == 0 or gamma == 0:
            continue
        GaussianError(alpha*0.1, gamma*0.1, 0, best_alpha, best_gamma)
        
run_sims()
print(best_alpha)
print(best_gamma)

Compilation is falling back to object mode WITH looplifting enabled because Function "run_sims" failed type inference due to: Invalid use of type(CPUDispatcher(<function GaussianError at 0x7f41ad940dd0>)) with parameters (float64, float64, Literal[int](0), Literal[int](0), Literal[int](0))

During: resolving callee type: type(CPUDispatcher(<function GaussianError at 0x7f41ad940dd0>))
During: typing of call at <ipython-input-26-1ee6162a7113> (9)


File "<ipython-input-26-1ee6162a7113>", line 9:
def run_sims(end=4):
    <source elided>
            continue
        GaussianError(alpha*0.1, gamma*0.1, 0, best_alpha, best_gamma)
        ^

  @jit(nogil=True, parallel=True)
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "run_sims" failed type inference due to: cannot determine Numba type of <class 'numba.core.dispatcher.LiftedLoop'>

File "<ipython-input-26-1ee6162a7113>", line 5:
def run_sims(end=4):
    for alpha in prange(int(end/0.1)):
    ^

  @j

0
0


In [None]:
#Use 10-fold CV to tune the lenght of scale sigma and lambda for both above kernels. 

In [None]:
#MSE of all 3 modesl 

In [None]:
#Predict on new batch of wines 