In [None]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interactive
import ipywidgets as widgets
from ipywidgets import fixed
from sklearn.linear_model import LinearRegression


In [None]:
def generate_float_widget(name):
    return widgets.FloatLogSlider(
        value=-10,
        base=np.sqrt(2),
        min=-30,
        max=0,
        step=1,
        description=name+':',
        continuous_update= False
    )

def generate_int_widget(name, val, min_, max_, step=1):
    return widgets.IntSlider(
        value=val,
        min=min_,
        max=max_,
        step=step,
        description=name + ':',
        disabled=False,
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='d')


In [None]:
def generate_true_weights(k, p):
    w_true = np.zeros(k+p)
    w_true[:k] = np.random.randn(k)
    #we put the true weights on the sphere, so ||w^*||=1
    w_true /= np.linalg.norm(w_true)
    return w_true


def generate_X_orthogonal_blocks(lambda_L, lambda_S, k, n, p):
    X = np.empty(shape=(n, k+p))
    X[:, :k] = np.linalg.svd(np.random.randn(n, k), full_matrices=False)[0] * np.sqrt(lambda_L * n)
    X[:, k:] = np.linalg.svd(np.random.randn(n,p), full_matrices=False)[2] * np.sqrt(lambda_S * p)
    return X
def generate_X_gaussian(lambda_L, lambda_S, k, n, p):
    X = np.empty(shape=(n, k+p))
    X[:, :k] = np.random.randn(n,k) * np.sqrt(lambda_L)
    X[:, k:] = np.random.randn(n,p) * np.sqrt(lambda_S)
    return X


## Part 1
In this part of the notebook we will check that computations from part e of the problem. Below there are four cells with the code. Each of those cells contains a function that generates the data and computes one of four terms of the error from part b. There is also the second function that is not implemented yet. **Add the code to the second function in each cell so that it would compute the formula for the corresponding error term that you found in part e.** When you run one of those cells, you will be able to choose the parameters and print outputs of both functions for those parameters. If you do everything correctly, both outputs will be exactly the same!


The next cell computes the error term that corresponds to **bias in the spiked part.** We take  $\|{\bf w}^*\|$ to be 1 as the error term doesn't have any interesting dependence on this quantity (they are simply proportional). We also set $\lambda_L = 1$ and $n = 100$ and allow changing $k,p,\lambda_S$.


In [None]:
n_value = 100

def bias_spiked_empirical(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_orthogonal_blocks(lambda_L, lambda_S, k, n, p)
    w_true = generate_true_weights(k, p)
    w_estimated = X.T @ np.linalg.inv(X @ X.T) @ X @ w_true
    return np.sum((lambdas * (w_true - w_estimated)**2)[:k])

def bias_spiked_theoretical(lambda_L, lambda_S, k, n, p):
    #TODO implement this function
    ### start bias_spiked ###

    ### end bias_spiked ###

def compare_bias_spiked(lambda_L, lambda_S, k, n, p):
    print("simulation result: ", bias_spiked_empirical(lambda_L, lambda_S, k, n, p))
    print("theoretical result: ", bias_spiked_theoretical(lambda_L, lambda_S, k, n, p))

interactive_comparison = interactive(compare_bias_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S=generate_float_widget('$\lambda_S$'),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison


The next cell computes the error term that corresponds to **bias in the non-spiked part.** We take  $\|{\bf w}^*\|$ to be 1 as the error term doesn't have any interesting dependence on this quantity (they are simply proportional). We also set $\lambda_L = 1$ and $n = 100$ and allow changing $k,p,\lambda_S$.


In [None]:
n_value = 100

def bias_non_spiked_empirical(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_orthogonal_blocks(lambda_L, lambda_S, k, n, p)
    w_true = generate_true_weights(k, p)
    w_estimated = X.T @ np.linalg.inv(X @ X.T) @ X @ w_true
    return np.sum((lambdas * (w_true - w_estimated)**2)[k:])

def bias_non_spiked_theoretical(lambda_L, lambda_S, k, n, p):
    #TODO implement this function
    ### start bias_non-spiked ###

    ### end bias_non-spiked ###

def compare_bias_non_spiked(lambda_L, lambda_S, k, n, p):
    print(bias_non_spiked_empirical(lambda_L, lambda_S, k, n, p))
    print(bias_non_spiked_theoretical(lambda_L, lambda_S, k, n, p))

interactive_comparison = interactive(compare_bias_non_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S=generate_float_widget('$\lambda_S$'),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison


The next cell computes the error term that corresponds to **variance in the spiked part.** We take  $\|\sigma\|$ to be 1 as the error term doesn't have any interesting dependence on this quantity (they are simply proportional). We also set $\lambda_L = 1$ and $n = 100$ and allow changing $k,p,\lambda_S$.


In [None]:
n_value = 100

def variance_spiked_empirical(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_orthogonal_blocks(lambda_L, lambda_S, k, n, p)
    return np.linalg.norm((np.diag(np.sqrt(lambdas)) @ X.T @ np.linalg.inv(X @ X.T))[:k, :])**2

def variance_spiked_theoretical(lambda_L, lambda_S, k, n, p):
    #TODO implement this function
    ### start variance_spiked ###

    ### end variance_spiked ###

def compare_variance_spiked(lambda_L, lambda_S, k, n, p):
    print(variance_spiked_empirical(lambda_L, lambda_S, k, n, p))
    print(variance_spiked_theoretical(lambda_L, lambda_S, k, n, p))


interactive_comparison = interactive(compare_variance_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S=generate_float_widget('$\lambda_S$'),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison


The next cell computes the error term that corresponds to **variance in the non-spiked part.** We take  $\|\sigma\|$ to be 1 as the error term doesn't have any interesting dependence on this quantity (they are simply proportional). We also set $\lambda_L = 1$ and $n = 100$ and allow changing $k,p,\lambda_S$.


In [None]:
n_value = 100

def variance_non_spiked_empirical(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_orthogonal_blocks(lambda_L, lambda_S, k, n, p)
    return np.linalg.norm((np.diag(np.sqrt(lambdas)) @ X.T @ np.linalg.inv(X @ X.T))[k:, :])**2

def variance_non_spiked_theoretical(lambda_L, lambda_S, k, n, p):
    #TODO implement this function
    ### start variance_non-spiked ###

    ### end variance_non-spiked ###

def compare_variance_non_spiked(lambda_L, lambda_S, k, n, p):
    print(variance_non_spiked_empirical(lambda_L, lambda_S, k, n, p))
    print(variance_non_spiked_theoretical(lambda_L, lambda_S, k, n, p))

interactive_comparison = interactive(compare_variance_non_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S=generate_float_widget('$\lambda_S$'),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison


## Part 2
Recall that in part c we introduced assumptions on ${\bf X}$ that we thought would be approximately satisfied by gaussian design. In this part we are goin to see if it is indeed true.

**For $n \in \{10, 30, 100\}$ find such minimum $d$ that if you generate an $n\times d$ matrix ${\bf Z}$ with i.i.d. standard normal entries, then it often holds**
$$
\|{\bf Z}{\bf Z}^\top/d - {\bf I}_n\|< 1/3
$$

**How big do you think  $p/k$ should be to make gaussian design behave similarly to the design that we studied in our problem?**


In [None]:
### start check approximate conditions ###
n = 10
d = 400
Z = np.random.randn(n, d)
sing_vals = np.linalg.svd(Z)[1]
print(max(np.abs(sing_vals[0]**2 - d), np.abs(sing_vals[-1]**2 - d))/d)
### end check approximate conditions ###


## Part 3

Finally, we check how well our bounds work  for the gaussian design. Below there are four cells with code. In each of them we compare the error terms that we see for gaussian design with the bounds that we obtained for our simplified design. As in part a, we put $n=100$, $\lambda_L = 1$, $\|{\bf w^*}\|=1$ and $\sigma = 1$. Then for different values of $k$ and $d$ we compare our bounds for the 4 error terms to the corresponding error terms for gaussian design. **Run the experiements in this part and answer the following:**
1. **Do bounds from part e still work reasonably well if we have gaussian design?**
2. **For which values of the parameters the discrepancy is the highest?**
3. **Was your guess about $p/k$ from the previous part correct?**


Bias in the spiked part:


In [None]:
n_value = 100

def bias_spiked_gaussian(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_gaussian(lambda_L, lambda_S, k, n, p)
    w_true = generate_true_weights(k, p)
    w_estimated = X.T @ np.linalg.inv(X @ X.T) @ X @ w_true
    return np.sum((lambdas * (w_true - w_estimated)**2)[:k])


def compare_with_gaussian_bias_spiked(lambda_L, lambda_S_range, k, n, p):
    gaussian_errors=[]
    our_bounds=[]
    for lambda_S in lambda_S_range:
        gaussian_errors.append(bias_spiked_gaussian(lambda_L, lambda_S, k, n, p))
        our_bounds.append(bias_spiked_theoretical(lambda_L, lambda_S, k, n, p))
    plt.plot(lambda_S_range, gaussian_errors, label='gaussian X')
    plt.plot(lambda_S_range, our_bounds, label='our bound')
    plt.ylim([0,1])
    plt.xlabel("$\lambda_S$")
    plt.ylabel("error")
    plt.legend()


interactive_comparison = interactive(compare_with_gaussian_bias_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S_range=fixed(np.linspace(0.01, 1, 100)),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison


Bias in the non-spiked part:


In [None]:
n_value = 100

def bias_non_spiked_gaussian(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_gaussian(lambda_L, lambda_S, k, n, p)
    w_true = generate_true_weights(k, p)
    w_estimated = X.T @ np.linalg.inv(X @ X.T) @ X @ w_true
    return np.sum((lambdas * (w_true - w_estimated)**2)[k:])


def compare_with_gaussian_bias_non_spiked(lambda_L, lambda_S_range, k, n, p):
    gaussian_errors=[]
    our_bounds=[]
    for lambda_S in lambda_S_range:
        gaussian_errors.append(bias_non_spiked_gaussian(lambda_L, lambda_S, k, n, p))
        our_bounds.append(bias_non_spiked_theoretical(lambda_L, lambda_S, k, n, p))
    plt.plot(lambda_S_range, gaussian_errors, label='gaussian X')
    plt.plot(lambda_S_range, our_bounds, label='our bound')
    plt.ylim([0,1])
    plt.xlabel("$\lambda_S$")
    plt.ylabel("error")
    plt.legend()

interactive_comparison = interactive(compare_with_gaussian_bias_non_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S_range=fixed(np.linspace(0.1, 1, 100)),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison


Variance in the spiked part:


In [None]:
n_value = 100

def variance_spiked_gaussian(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_gaussian(lambda_L, lambda_S, k, n, p)
    return np.linalg.norm((np.diag(np.sqrt(lambdas)) @ X.T @ np.linalg.inv(X @ X.T))[:k, :])**2

def compare_with_gaussian_variance_spiked(lambda_L, lambda_S_range, k, n, p):
    gaussian_errors=[]
    our_bounds=[]
    for lambda_S in lambda_S_range:
        gaussian_errors.append(variance_spiked_gaussian(lambda_L, lambda_S, k, n, p))
        our_bounds.append(variance_spiked_theoretical(lambda_L, lambda_S, k, n, p))
    plt.plot(lambda_S_range, gaussian_errors, label='gaussian X')
    plt.plot(lambda_S_range, our_bounds, label='our bound')
    plt.ylim([0,1])
    plt.xlabel("$\lambda_S$")
    plt.ylabel("error")
    plt.legend()

interactive_comparison = interactive(compare_with_gaussian_variance_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S_range=fixed(np.linspace(0.1, 1, 100)),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison


Variance in the non-spiked part:


In [None]:
n_value = 100

def variance_non_spiked_gaussian(lambda_L, lambda_S, k, n, p):
    lambdas = np.array([lambda_L] * k + [lambda_S] * p)
    X = generate_X_gaussian(lambda_L, lambda_S, k, n, p)
    return np.linalg.norm((np.diag(np.sqrt(lambdas)) @ X.T @ np.linalg.inv(X @ X.T))[k:, :])**2


def compare_with_gaussian_variance_non_spiked(lambda_L, lambda_S_range, k, n, p):
    gaussian_errors=[]
    our_bounds=[]
    for lambda_S in lambda_S_range:
        gaussian_errors.append(variance_non_spiked_gaussian(lambda_L, lambda_S, k, n, p))
        our_bounds.append(variance_non_spiked_theoretical(lambda_L, lambda_S, k, n, p))
    plt.plot(lambda_S_range, gaussian_errors, label='gaussian X')
    plt.plot(lambda_S_range, our_bounds, label='our bound')
    plt.ylim([0,1])
    plt.xlabel("$\lambda_S$")
    plt.ylabel("error")
    plt.legend()

interactive_comparison = interactive(compare_with_gaussian_variance_non_spiked,
                                     lambda_L=fixed(1.),
                                     lambda_S_range=fixed(np.linspace(0.1, 1, 100)),
                                     k=generate_int_widget('k', n_value//2, 1, n_value),
                                     n=fixed(n_value),
                                     p=generate_int_widget('p', 2*n_value, n_value, 10 * n_value))

interactive_comparison
