<h1>
    Imports

In [23]:
import numpy as np
import random
import sys
import os

<h1>
    Loading Data
   

In [24]:
# Create sample data if files don't exist
def create_sample_data():
    # Create sample data for demonstration
    n_samples, n_features = 100, 7
    
    # Create random training data
    X_train = np.random.randn(n_samples, n_features)
    true_weights = np.random.randn(n_features)
    y_train = X_train @ true_weights + np.random.randn(n_samples) * 0.1
    
    # Create test data
    X_test = np.random.randn(50, n_features)
    
    # Save to CSV files
    np.savetxt("X_train.csv", X_train, delimiter=",")
    np.savetxt("y_train.csv", y_train, delimiter=",")
    np.savetxt("X_test.csv", X_test, delimiter=",")
    
    return X_train, y_train, X_test

import os

data_dir = r"D:\Machine Learning from Columbia+\Module-3"
x_train_path = os.path.join(data_dir, "X_train.csv")
y_train_path = os.path.join(data_dir, "y_train.csv")
x_test_path = os.path.join(data_dir, "X_test.csv")

if not (os.path.exists(x_train_path) and os.path.exists(y_train_path) and os.path.exists(x_test_path)):
    X_train, y_train, X_test = create_sample_data()
else:
    X_train = np.genfromtxt(x_train_path, delimiter=",")
    y_train = np.genfromtxt(y_train_path, delimiter=",")
    X_test = np.genfromtxt(x_test_path, delimiter=",")

lambda_input = int(random.randrange(2**15)%10)
sigma2_input = float(random.randrange(2**15)%10 + 1)


<h1>
Part 1

PART 1: In this part you will implement the $\ell_2$-regularized least squares linear regression algorithm we have been discussing (ridge regression). Recall from the lectures that this takes the form:

$$w_{RR} = \arg\min_w \|y - Xw\|^2 + \lambda\|w\|^2.$$

Your task will be to write code that takes in data $y$ and $X$ and outputs $w_{RR}$ for an arbitrary value of $\lambda$.

In [25]:
## Solution for Part 1
def part1(lambda0, X_train, y_train):
    d = X_train.shape[1]
    temp = lambda0*np.eye(d) + X_train.T.dot(X_train)
    wRR = (np.linalg.inv(temp)).dot(X_train.T.dot(y_train))
    return wRR

In [26]:
wRR = part1(lambda_input, X_train, y_train)
wRR

array([-0.84586823,  0.68340145,  0.10772735, -0.79785917,  0.64700549,
        0.82833688, -2.29250051])

<h1>
    Part 2

In the same code, you will also implement the active learning procedure discussed in Lecture 5. For this problem, we will provide you with an arbitrary setting of $\lambda$ and $\sigma^2$ and ask you to provide us with the first 10 locations you would measure from a set $\mathcal{D} = \{x\}$ given a set of measured pairs (y, X). Please look over the slides carefully to remind yourself about the sequential evolution of the sets $\mathcal{D}$ and (y,X).

In [27]:
## Solution for Part 2
def update(lambda0, sigma2, X_train, d, y_train, old_xx, old_xy):
    old_xx = X_train.T.dot(X_train) + old_xx
    old_xy = X_train.T.dot(y_train) + old_xy
    new_var_inv = lambda0 * np.eye(d) + (1 / sigma2) * old_xx
    new_var = np.linalg.inv(new_var_inv)
    sigma_temp = lambda0 * sigma2 * np.eye(d) + old_xx
    new_mean = (np.linalg.inv(sigma_temp)).dot(old_xy)
    return new_var, new_mean, old_xx, old_xy

def part2(lambda0, sigma2, X_train, y_train, X_test):
    d = X_train.shape[1]
    active = []
    old_xx = np.zeros((d, d))
    old_xy = np.zeros(d)
    new_var, new_mean, old_xx, old_xy = update(lambda0, sigma2, X_train, d, y_train, old_xx, old_xy)
    wRR = new_mean
    indices = list(range(X_test.shape[0]))
    for i in range(0, 10):
        var_matrix = (X_test.dot(new_var)).dot(X_test.T)
        row_largest = np.argmax(var_matrix.diagonal())
        X_train = X_test[row_largest, :]
        y_train = X_train.dot(wRR)
        actual_row = indices[row_largest]
        active.append(actual_row)
        X_test = np.delete(X_test, row_largest, axis=0)
        indices.pop(row_largest)
        new_var, new_mean, old_xx, old_xy = update(lambda0, sigma2, X_train, d, y_train, old_xx, old_xy)
        wRR = new_mean
    active = [i + 1 for i in active]
    return active

In [28]:
active = part2(lambda_input, sigma2_input, X_train, y_train, X_test)
active

[35, 26, 39, 42, 25, 3, 33, 12, 29, 31]