# LAB 1: Kernel Ridge Regression
Authors: 

    Mathurin Massias (mathurin.massias@gmail.com)
    
    Giacomo Meanti  (giacomo.meanti@gmail.com)

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import train_test_split
from sklearn import datasets

from lab1_utils import create_random_data, data_split

## Warmup

### Data Generation

In [None]:
X, y = create_random_data(n_samples=1000, noise_level=1, seed=932)
print("%d samples, %d features" % X.shape)

In [None]:
fig, ax = plt.subplots()
ax.scatter(X[y == -1][:,0], X[y == -1][:,1], alpha=0.5)
ax.scatter(X[y == 1][:,0], X[y == 1][:,1], alpha=0.5);

### Splitting the data into train and test

In [None]:
X_train, X_test, y_train, y_test = data_split(X, y, n_train=800)
print("%d training samples, %d test samples" % (X_train.shape[0], X_test.shape[0]))

### Training a linear ridge-regression model

In [None]:
def plot_separation(X, Y, model):
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = .02  # step size in the mesh
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z[Z < 0] = -1
    Z[Z >= 0] = 1

    Z = Z.reshape(xx.shape)
    plt.contour(xx, yy, Z, cmap=plt.cm.Paired)
    
    plt.scatter(X[Y == -1][:, 0], X[Y == -1][:, 1])
    plt.scatter(X[Y == 1][:, 0], X[Y == 1][:, 1])

    
def binary_classif_error(y_true, y_pred):
    return np.mean(np.sign(y_pred) != y_true)

In [None]:
regularization = 0.001

model = KernelRidge(regularization, kernel="linear")
model.fit(X_train, y_train)

In [None]:
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

print("Training error: %.2f%%" % (binary_classif_error(y_train, train_preds) * 100))
print("Test error: %.2f%%" % (binary_classif_error(y_test, test_preds) * 100))

In [None]:
plot_separation(X_train, y_train, model)

### Exploring the effect of different parameters

In [None]:
# 1. Change the regularization parameter
reg_values = np.geomspace(1e-4, 5e3, num=50)
test_errors = []
for reg in reg_values:
    model = KernelRidge(reg, kernel="linear")
    model.fit(X_train, y_train)
    test_preds = model.predict(X_test)
    test_errors.append(binary_classif_error(y_test, test_preds))

In [None]:
fig, ax = plt.subplots()
ax.semilogx(reg_values, test_errors)
ax.set_xlabel("Regularization")
ax.set_ylabel("Test error");

In [None]:
# 2. Change in number of data-points
# num_points = [1000, 2000, 3000, 4000, 5000]
num_points = np.arange(500, 3000, 100)
np_test_errors = []
model = KernelRidge(1, kernel="linear")
for points in num_points:
    X, y = create_random_data(points, 1, seed=932)
    X_train, X_test, y_train, y_test = data_split(X, y, n_train=points - 200)
    model.fit(X_train, y_train)
    test_preds = model.predict(X_test)
    np_test_errors.append(binary_classif_error(y_test, test_preds))

In [None]:
fig, ax = plt.subplots()
ax.plot(num_points, np_test_errors)
ax.set_xlabel("Number of points")
ax.set_ylabel("Test error");

In [None]:
# 3. Amount of noise in the data
data_noise = [0.3, 0.5, 1.0, 2.0]
noise_test_errors = []
model = KernelRidge(1, kernel="linear")
for noise in data_noise:
    X, y = create_random_data(1000, noise, seed=932)
    X_train, X_test, y_train, y_test = data_split(X, y, n_train=800)
    model.fit(X_train, y_train)
    test_preds = model.predict(X_test)
    noise_test_errors.append(binary_classif_error(y_test, test_preds))

In [None]:
fig, ax = plt.subplots()
ax.plot(data_noise, noise_test_errors)
ax.set_xlabel("Data noise")
ax.set_ylabel("Test error");

### Cross-Validation

## Part 2: Kernel ridge regression