# TP3 Kernel Methods for Machine Learning

Written by Yunlong Jiao / Romain Menegaux, 19 May 2020

In [1]:
# setup
import numpy as np

In [2]:
import sys
print(sys.version)

2.7.16 (default, Oct 10 2019, 22:02:15) 
[GCC 8.3.0]


In [3]:
import sklearn
from sklearn import linear_model as lm
sklearn.__version__

'0.20.4'

## Tasks

1. Implement (naive) solvers to Ridge Regression, Weighted Ridge Regression and Logistic Ridge Regression (using Iteratively Reweighted Least Squares). See notes for the mathematical derivation.
2. Simulate some toy data to check if our solvers give correct solutions.

In [4]:
# Toy data
np.random.seed(42)
n = 100
p = 10
X = np.random.normal(0, 1, (n, p))
X = sklearn.preprocessing.scale(X)
beta_star = np.random.normal(0, 1, p)
y = X.dot(beta_star) + 0.2 * np.random.normal(0, 1, n)

def compare(beta1, beta2):
    print('''
Our solver:
{}
Scikit-learn:
{}

Difference between the two:
{}
        '''.format(beta1, beta2, np.sum((beta1-beta2)**2))
    )

## Solutions

**Ridge Regression (RR)**

Given $X \in \mathbb{R}^{n \times p}$ and $y \in \mathbb{R}^n$, solve
$$
\min_{\beta \in \mathbb{R}^p} \frac{1}{n} \|y - X \beta\|^2 + \lambda \|\beta\|^2 \,.
$$

In [13]:
# Ridge Regression (RR)
def solveRR(y, X, lam):
    n, p = X.shape
    assert (len(y) == n)
    
    # Hint:
    # beta = np.linalg.solve(A, b)
    # Finds solution to the linear system Ax = b
    beta = np.linalg.inv(X.T.dot(X) + lam * n*np.eye(p)).dot(X.T.dot(y))
#     beta = np.linalg.solve(A, b)
    
    return (beta)

**Try it out:**

In [14]:
lam = 0.1

# Our solver
beta1 = solveRR(y, X, lam)

# Python solver
alpha = lam * X.shape[0]
model = lm.Ridge(alpha=alpha, fit_intercept=False, normalize=False)
beta2 = model.fit(X, y).coef_

# Check
compare(beta1, beta2)


Our solver:
[ 1.27929172  0.78935356  0.05064497 -0.55474398  0.65276533  0.32637554
  0.765293    0.63326617  0.97285396 -0.5294559 ]
Scikit-learn:
[ 1.27929172  0.78935356  0.05064497 -0.55474398  0.65276533  0.32637554
  0.765293    0.63326617  0.97285396 -0.5294559 ]

Difference between the two:
6.02479034853e-31
        


**Weighted Ridge Regression (WRR)**

Given $X \in \mathbb{R}^{n \times p}$ and $y \in \mathbb{R}^n$, and weights $w \in \mathbb{R}^n_+$, solve
$$
\min_{\beta \in \mathbb{R}^p} \frac{1}{n} \sum_{i=1}^n w_i (y_i - \beta^\top x_i)^2 + \lambda \|\beta\|^2 \,.
$$

In [None]:
# Weighted Ridge Regression (WRR)
def solveWRR(y, X, w, lam):
    n, p = X.shape
    assert (len(y) == len(w) == n)

    # Hint:
    # Find y1 and X1 such that:
    # beta = solveRR(y1, X1, lam)
    return (beta)

**Try it out:**

In [None]:
lam = 0.1
w = np.random.rand(len(y))

# Our solver
beta1 = solveWRR(y, X, w, lam)

# Python solver
alpha = lam * X.shape[0]
model = lm.Ridge(alpha=alpha, fit_intercept=False, normalize=False)
beta2 = model.fit(X, y, sample_weight=w).coef_

# Check
compare(beta1, beta2)

**Logistic Ridge Regression (LRR)**

Given $X \in \mathbb{R}^{n \times p}$ and $y \in \{-1,+1\}^n$, solve
$$
\min_{\beta \in \mathbb{R}^p} \frac{1}{n} \sum_{i=1}^n \log (1+e^{-y_i \beta^\top x_i}) + \lambda \|\beta\|^2 \,.
$$

In [None]:
# Logistic Ridge Regression (LRR)
def solveLRR(y, X, lam):
    n, p = X.shape
    assert (len(y) == n)
            
    # Hint: Use IRLS
    # for i in range(max_iter):
    #     ...
    #     beta = solveWRR(z, X, w, 2*lam)    
    return (beta)

**Try it out:**

In [None]:
y_bin = np.sign(y) # Binarize targets
lam = 0.1

# Our solver
beta1 = solveLRR(y_bin, X, lam)

# Python solver
alpha = 2 * lam * X.shape[0]
model = lm.LogisticRegression(C=1/alpha, fit_intercept=False)
beta2 = model.fit(X, y_bin).coef_

# Check
compare(beta1, beta2)

### Mini Data Challenge

We will try to predict whether patients have breast cancer.

We use scikit-learn's [breast cancer dataset](https://scikit-learn.org/stable/datasets/index.html#breast-cancer-dataset)

30 features, 569 samples, 2 labels ('malignant' or 'benign')

In [None]:
# Load data and split into training / validation sets
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True)
X = sklearn.preprocessing.scale(X)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)
X.shape

In [None]:
# Fit our model and compute its parameters
lam = 0.01
beta = solveLRR(y_train, X_train, lam)

In [None]:
# Compute predicted probabilities and classes
# probas_pred = ?
# y_pred = ?

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score

print("Our model's performance:")
print('Accuracy: {:.2%}'.format(accuracy_score(y_test, y_pred)))
print('AUC: {:.2%}'.format(roc_auc_score(y_test, probas_pred)))

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))