# Imports, config and setting global variables

In [1]:
import os
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# from sklearn.linear_model import LinearRegression, Lasso, Ridge

In [2]:
import logging

if not os.path.exists('logs'):
    os.makedirs('logs')

LOGGING_FILE = os.path.join('logs', 'experiments.log')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', 
                    datefmt='%Y-%m-%d %H:%M:%S', 
                    handlers=[logging.StreamHandler(), 
                              logging.FileHandler(LOGGING_FILE)])


In [54]:
N_SAMPLES = 10_000

MIN_LAMBDA_LOG_10 = -3
MAX_LAMBDA_LOG_10 = 3
NUM_LAMBDAS = 100

logging.info(f'Generating {N_SAMPLES} samples')
logging.info(f'''Using {NUM_LAMBDAS} lambdas equally spaced in log space from 10^(-{MIN_LAMBDA_LOG_10}) to 10^{MAX_LAMBDA_LOG_10}''')

LAMBDAS = np.logspace(MIN_LAMBDA_LOG_10, MAX_LAMBDA_LOG_10, NUM_LAMBDAS)


2024-12-07 23:35:13 - INFO - Generating 10000 samples
2024-12-07 23:35:13 - INFO - Using 100 lambdas equally spaced in log space from 10^(--3) to 10^3


## Helpers

In [55]:
def empirical_risk(X, y, theta):
    return np.mean((y - X @ theta)**2)
    # return np.sum((y - X @ theta)**2)


In [56]:
def filter_lambda_closest(df, target_lambda):
    closest_row = df.iloc[(df['lambda'] - target_lambda).abs().argsort()[:1]]
    return closest_row

# Defining Data Generating Processes 

## (Slide 327/337) y = 3x_1 - 2x_2 + epsilon. epsilon ~ N(0, 1)

In [57]:
# 2 d uniform random variables
X = np.random.rand(N_SAMPLES, 2)

theta_true = np.array([3, -2])

y = X @ theta_true + np.random.randn(N_SAMPLES)

# Contour plots

### Contours of the unregularized loss function

In [58]:
min_theta, max_theta = -5, 5
values_for_each_theta = 50

theta_1_vals = np.linspace(min_theta, max_theta, values_for_each_theta)
theta_2_vals = np.linspace(min_theta, max_theta, values_for_each_theta)

theta_pairs = np.array(np.meshgrid(theta_1_vals, theta_2_vals)).T.reshape(-1, 2)

df_unreg_risk = pd.DataFrame(theta_pairs, columns=['theta_1', 'theta_2'])

In [59]:
df_unreg_risk["emp_risk"] = df_unreg_risk[['theta_1', 'theta_2']].apply(lambda x:
    empirical_risk(X, y, x), axis=1)

In [60]:
df_unreg_risk

Unnamed: 0,theta_1,theta_2,emp_risk
0,-5.0,-5.000000,36.757077
1,-5.0,-4.795918,35.559469
2,-5.0,-4.591837,34.389567
3,-5.0,-4.387755,33.247370
4,-5.0,-4.183673,32.132880
...,...,...,...
2495,5.0,4.183673,21.152857
2496,5.0,4.387755,22.207574
2497,5.0,4.591837,23.289997
2498,5.0,4.795918,24.400125


In [None]:
fig_cont_plot = go.Figure(data=go.Contour(
    z=df_unreg_risk.pivot(index='theta_2', columns='theta_1', values='emp_risk').values,
    x=theta_1_vals,
    y=theta_2_vals,
    colorscale='Viridis',
    showscale=False  # Remove the color bar
))

fig_cont_plot.update_layout(
    title='Contour plot of empirical risk',
    xaxis_title='theta_1',
    yaxis_title='theta_2'
)

# add horizontal and vertical lines at 0s
fig_cont_plot.add_shape(
    type='line',
    x0=min_theta, y0=0, x1=max_theta, y1=0,
    line=dict(color='white', width=1)
)

fig_cont_plot.add_shape(
    type='line',
    x0=0, y0=min_theta, x1=0, y1=max_theta,
    line=dict(color='white', width=1)
)

# add a point for the true theta
fig_cont_plot.add_trace(go.Scatter(
    x=[theta_true[0]], y=[theta_true[1]],
    mode='markers',
    marker=dict(size=10, color='red'),
    name=f'theta_true = {theta_true}'
))

fig_cont_plot.add_trace(go.Scatter(
    x=[theta_true[0]], y=[theta_true[1]],
    mode='markers',
    marker=dict(size=10, color='white'),
    name=f'theta_true = {theta_true}'
))


# fig.show()

# Finding thetas

In [68]:
def get_thetas(X, y, lmbda, add_intercept=False):
    """
    Computes the optimal thetas for the linear regression problem with L2 regularization.
    
    Args:
        X (np.ndarray): Data (intercept column will be added later)
        y (np.ndarray): Target
        lmbda (float): Regularization parameter
        add_intercept (bool): If True, an intercept column will be added to X
    
    ToDO:
    - Here we also penalize the intercept term, I'm not sure if this is what we 
    want for our experiments. Need to figure this our (or preferably, try both)
    
    Note:
        - (This is unrelated to our goal but may be interesting): 
        performing QR decomposition before solving the normal equations did not
        provide any speedup even for large datasets. (We had just learned about
        this from Optimization course and I wanted to benchmark it)
    """
    if add_intercept:
        X = np.c_[np.ones((n_samples, 1)), X]

    n_samples, n_features = X.shape

    LHS = X.T.dot(X) + lmbda*np.eye(n_features)
    RHS = X.T.dot(y)
    
    return np.linalg.solve(LHS, RHS)

In [81]:
theta_analytic = get_thetas(X, y, 0, add_intercept=False)

# assert dist between theta_analytic and theta_true is small
assert np.linalg.norm(theta_analytic - theta_true) < 1e-1, f'Analytic solution is quite far {theta_analytic} != {theta_true}'

# Running the experiments

In [69]:
theta_dict = {
    lmbda: get_thetas(X, y, lmbda) for lmbda in LAMBDAS
}

In [82]:
df_thetas_per_lambda = pd.DataFrame(theta_dict).T

df_thetas_per_lambda.reset_index(inplace=True)


df_thetas_per_lambda.rename(columns={"index": "lambda", 0: 'theta_1', 1: 'theta_2'}, 
          inplace=True)
df_thetas_per_lambda["L1_norm"] = df_thetas_per_lambda[['theta_1', 'theta_2']].apply(lambda x: 
    np.linalg.norm(x, ord=1), axis=1)
df_thetas_per_lambda["L2_norm"] = df_thetas_per_lambda[['theta_1', 'theta_2']].apply(lambda x: 
    np.linalg.norm(x, ord=2), axis=1)	


### Adding thetas to the contour plot

In [83]:
lambdas_do_display = [0.1, 1, 2.5, 5, 10, 20, 100, 1000]

for lmbda in lambdas_do_display:
    df_thetas_per_lambda_lmbda = filter_lambda_closest(df_thetas_per_lambda, lmbda)
    fig_cont_plot.add_trace(go.Scatter(
        x=df_thetas_per_lambda_lmbda['theta_1'],
        y=df_thetas_per_lambda_lmbda['theta_2'],
        mode='markers',
        marker=dict(size=10, color='green'),
        name=f'lambda={lmbda}'
    ))
    
fig_cont_plot.show()

# Plotting the results

In [85]:

fig = px.scatter(df_thetas_per_lambda, x='lambda', y=['L2_norm'], 
                 title='L2 norm of the coefficients (huge range for lambdas)', 
                 labels={'value': 'Norm', 'variable': 'Lambda'},
                 log_y=True)
fig.show()

In [None]:
# set xlim to 0, 1
fig.update_layout(title='L2 norm of the coefficients (small range for lambdas)')
s
fig.update_xaxes(range=[0, 1])
fig.update_yaxes(range=[3.6, 3.65])
fig.show()

# Conclusion

This DGP doesn't really let us observe any interesting results, I'll try it out with 
more complex DGPs and see if we can observe anything