In [2]:
#packages
import numpy as np
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.linalg import qr
from scipy.sparse import csr_matrix, coo_matrix
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import SGDRegressor
from scipy.linalg import lstsq

In [3]:
def fast_sketch_matrix(m, n_columns):
    row_indices = np.random.randint(0, m, size=n_columns)  # Pick one random row per column
    col_indices = np.arange(n_columns)  # Each column gets one entry
    values = np.random.choice([1, -1], size=n_columns)  # ±1 values

    # Directly construct the sparse matrix
    S = csr_matrix((values, (row_indices, col_indices)), shape=(m, n_columns))

    return S

In [4]:
#Higher leverage scores indicate more influential data points.
def estimate_leverage_scores(A, R, gamma):
    n, d = A.shape
    k = int(np.ceil(d / gamma))  # Choose k based on γ
    G = np.random.randn(d, k) / np.sqrt(k) #scale the matrix
    ARG = A @ (R @ G)
    leverage_scores = np.sum(ARG ** 2, axis=1)
    
    return leverage_scores


In [7]:
def ill_conditioned_matrix(n, d, condition_number=1e10):
    # Generate a random matrix using SVD decomposition
    U, _ , Vt = np.linalg.svd(np.random.randn(n, d), full_matrices=False)
    
    # Create singular values that decay exponentially
    singular_values = np.geomspace(1, 1 / condition_number, num=d)
    
    # Construct ill-conditioned matrix
    A = U @ np.diag(singular_values) @ Vt
    
    # Generate a true solution
    x_true = np.random.randn(d)
    
    # Generate noisy observations
    b = A @ x_true + 0.01 * np.random.randn(n)  # Small noise
    
    return A, b, x_true

In [89]:
n = 100000  # Number of samples
d = 300   # Number of features

# Generate an ill-conditioned matrix
U = np.random.randn(n, d)
s = np.logspace(-1, -10, d)  # Exponentially decreasing singular values
V = np.random.randn(d, d)
X = U @ np.diag(s) @ V.T

# Generate true parameters and target values
true_params = np.random.randn(d, 1)
y = X @ true_params + np.random.normal(0, 0.1, (n, 1))

In [90]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [110]:
def fast_least_squares_sgd_analysis(A, b, m, T=10000, eta=0.01, gamma=0.1, batch_size=10):
    start_total_time = time.perf_counter()
    n, d = A.shape
    A = np.c_[np.ones(n), A] #for intercept
    iteration_times = []
    
    # Start timing for sketching
    sketch_start_time = time.perf_counter()
    # Step 1: Generate the sketch matrix S using the sketch_matrix function
    S = fast_sketch_matrix(m, n)


    # Step 2: Compute SA and Sb
    SA = S @ A
    Sb = S @ b

    sketching_time = time.perf_counter() - sketch_start_time

    # Step 3: Compute QR decomposition of SA
    qr_start = time.perf_counter()
    Q, R_inv = np.linalg.qr(SA)
    R = np.linalg.inv(R_inv)
    QR_decomposition_time = time.perf_counter() - qr_start

    # Step 4: Compute leverage score estimates
    leverage_start = time.perf_counter()
    leverage_scores = estimate_leverage_scores(A, R, gamma)
    leverage_probs = leverage_scores / np.sum(leverage_scores)  # Normalize
    leverage_end = time.perf_counter() - leverage_start
    print(leverage_end)
    
    x = np.random.randn(d + 1,1)

    # Step 6: Perform mini-batch SGD
    for t in range(T):
        iter_start = time.perf_counter()
        # Step 6.1: Sample batch indices based on leverage scores
        batch_indices = np.random.choice(n, size=batch_size, p=leverage_probs)
        
        # Step 6.2: Construct StA and Stb for mini-batch
        StA = A[batch_indices]
        Stb = b[batch_indices]

        # Step 6.3: Compute gradient of the least squares loss
        gt = 2 * StA.T @ (StA @ x - Stb)

        # Step 6.4: Update x using preconditioned gradient
        x = x - eta * R @ (R.T @ gt)
        iteration_times.append(time.perf_counter() - iter_start)
        print(iteration_times)

        if np.linalg.norm(gt) < 1e-3:  # Early stopping criterion
            break
    total_time = time.perf_counter() - start_total_time

    return x[1:],x[0],QR_decomposition_time ,iteration_times, sketching_time , total_time

In [109]:
def fast_least_squares_sgd_with_intercept_no_sketch(A, b, T=10000, eta=0.01, gamma=0.1, batch_size=10):
    start_total_time = time.perf_counter()
    n, d = A.shape
    A = np.c_[np.ones(n), A]  # for intercept
    d += 1
    iteration_times = []

    # Step 1: Compute QR decomposition of A
    start_time = time.time()
    Q, R_inv = np.linalg.qr(A)
    R = np.linalg.inv(R_inv)
    QR_decomposition_time = time.time() - start_time

    # Step 2: Compute leverage score estimates
    leverage_scores = estimate_leverage_scores(A, R, gamma)
    leverage_probs = leverage_scores / np.sum(leverage_scores)  # Normalize

    # Step 3: Compute initial x0 by solving (Ax = b)
    x = np.random.randn(d,1)

    # Step 4: Perform mini-batch SGD
    for t in range(T):
        iter_start = time.perf_counter()
        # Step 4.1: Sample batch indices based on leverage scores
        batch_indices = np.random.choice(n, size=batch_size, p=leverage_probs)
        
        # Step 4.2: Construct StA and Stb for mini-batch
        StA = A[batch_indices]
        Stb = b[batch_indices]

        # Step 4.3: Compute gradient of the least squares loss
        gt = 2 * StA.T @ (StA @ x - Stb)

        # Step 4.4: Update x using preconditioned gradient
        x = x - eta * R @ (R.T @ gt)
        iteration_times.append(time.perf_counter() - iter_start)

        if np.linalg.norm(gt) < 1e-3:  # Early stopping criterion
            break
    total_time = time.perf_counter() - start_total_time

    return x[1:], x[0], QR_decomposition_time , iteration_times, total_time

In [None]:
iterations = 1
results = []
m = 5000

for i in range(iterations):
    x_estimate , intercept, qr_time, mini_batch_time, sketching_time ,total_time = fast_least_squares_sgd_analysis(X_train,y_train,m)
    mse = np.mean(((X_test @ x_estimate) + intercept - y_test)**2)
    results.append({'total time': total_time,'QR_time' : qr_time, 'mini batch time' : mini_batch_time ,'Sketching time' : sketching_time , 'x_estimate' : x_estimate, 'intercept' : intercept, 'mse' : mse})

ill_fast_sketched_SGD_df = pd.DataFrame(results)


In [104]:
ill_fast_sketched_SGD_df

Unnamed: 0,total time,QR_time,mini batch time,Sketching time,x_estimate,intercept,mse
0,2.690288,0.124412,"[0.0021176000591367483, 0.0011691998224705458,...",0.030519,"[[36775.99706230154], [-7841.8027785017075], [...",[-0.7810871587903552],69.292882


In [94]:
ill_fast_sketched_SGD_df["mini batch time"].apply(len)

0    100
Name: mini batch time, dtype: int64

In [95]:
iterations = 1
results = []

for i in range(iterations):
    start_time = time.time()
    x_estimate, _, _ , _ = np.linalg.lstsq(X_train, y_train, rcond=None)
    end_time = time.time()
    mse = np.mean(((X_test @ x_estimate) - y_test)**2)
    iteration_time = end_time - start_time
    
    results.append({'time': iteration_time, 'x_estimate' : x_estimate, 'mse' : mse})
ill_non_sketch_nplstq_df = pd.DataFrame(results)

In [96]:
ill_non_sketch_nplstq_df

Unnamed: 0,time,x_estimate,mse
0,1.401599,"[[223529.66069337373], [167.416248352587], [-1...",0.010052


In [107]:
iterations = 1
results = []
m = 3000

for i in range(iterations):
    x_estimate , intercept, qr_time, mini_batch_time ,total_time = fast_least_squares_sgd_with_intercept_no_sketch(X_train , y_train)
    mse = np.mean(((X_test @ x_estimate) + intercept - y_test)**2)
    results.append({'total time': total_time,'QR_time' : qr_time, 'mini batch time' : mini_batch_time ,'Sketching time' : sketching_time , 'x_estimate' : x_estimate, 'intercept' : intercept, 'mse' : mse})

ill_fast_non_sketched_SGD_df = pd.DataFrame(results)

In [108]:
ill_fast_non_sketched_SGD_df

Unnamed: 0,total time,QR_time,mini batch time,Sketching time,x_estimate,intercept,mse
0,4.936535,2.529799,"[0.001753099961206317, 0.0016756998375058174, ...",0.030519,"[[1048.3671892516766], [97093.84475151525], [-...",[-0.5521399385493451],38.20835
