# 01 Import libraries

In [1]:
import numpy as np
import torch

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using device: {device}")

Using device: cpu


In [2]:
import pandas as pd

In [3]:
%reload_ext autoreload
%autoreload 2

In [4]:
import importlib

In [5]:
import lib
importlib.reload(lib)

<module 'lib' from 'c:\\Users\\dongq\\OneDrive\\Desktop\\New results\\Learning-the-Optimal-Solution-Path\\lib\\__init__.py'>

In [169]:
from lib.lsp.basis_generator import customized_jacobi
from lib.lsp.basis_tf_module import Basis_TF_SGD
from lib.lsp.learn_solution_path import learn_solution_path
from lib.lsp.loss_fn_lsp import reg_unif_weighted_logit
from lib.fast_tensor_data_loader import FastTensorDataLoader
from lib.lsp.utils_lsp import get_errs_lsp

# 02 Instantiate dataset

In [170]:
# file path
X_df = pd.read_csv('X_processed.csv')
y_df = pd.read_csv('y_processed.csv')

In [8]:
X = np.array(X_df)
y = np.array(y_df).squeeze()

In [9]:
train_X = torch.tensor(X, dtype=torch.float32)
train_y = torch.tensor(y, dtype=torch.float32)

In [10]:
# full gradient descent uses all data points
GD_data_loader = FastTensorDataLoader(train_X, train_y, batch_size=1000, shuffle=True, )
# test data
test_data_loader = FastTensorDataLoader(train_X, train_y, batch_size=1000, shuffle=False, )

In [11]:
lam_max = [1]
lam_min = [0]
input_dim = X.shape[1]
loss_fn = reg_unif_weighted_logit

In [12]:
# Read the CSV file into a DataFrame
truth = pd.read_csv('exact_soln_list_jacobi.csv')

# Display the DataFrame
truth

Unnamed: 0,losses,theta_0,theta_1,theta_2,theta_3,theta_4,theta_5,theta_6,theta_7,theta_8,...,theta_36,theta_37,theta_38,theta_39,theta_40,theta_41,theta_42,theta_43,theta_44,theta_45
0,0.190793,-0.373029,0.186622,0.227443,-0.023583,0.164822,0.142992,0.168018,0.026305,0.142661,...,-0.018010,0.054908,-0.005629,0.000000,0.000000,-0.031270,-0.018010,-0.139608,-0.116840,-0.061673
1,0.192130,-0.371212,0.186634,0.227586,-0.023679,0.164875,0.142262,0.167428,0.025960,0.142993,...,-0.017600,0.054199,-0.005657,0.000069,0.000098,-0.031115,-0.017600,-0.139385,-0.116402,-0.061226
2,0.193463,-0.369401,0.186645,0.227729,-0.023775,0.164927,0.141535,0.166842,0.025615,0.143323,...,-0.017191,0.053493,-0.005685,0.000137,0.000195,-0.030961,-0.017191,-0.139162,-0.115965,-0.060781
3,0.194792,-0.367595,0.186656,0.227870,-0.023870,0.164977,0.140811,0.166258,0.025271,0.143653,...,-0.016784,0.052790,-0.005713,0.000205,0.000293,-0.030808,-0.016784,-0.138939,-0.115529,-0.060337
4,0.196117,-0.365794,0.186665,0.228009,-0.023964,0.165027,0.140089,0.165677,0.024927,0.143982,...,-0.016378,0.052089,-0.005741,0.000273,0.000390,-0.030655,-0.016378,-0.138717,-0.115094,-0.059894
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1019,0.327684,0.692472,-0.029101,-0.033382,-0.017462,-0.005831,-0.225541,-0.146453,-0.051680,0.070009,...,0.105324,-0.201982,0.000913,0.013100,0.018598,0.063129,0.105324,0.157442,0.161747,0.171301
1020,0.325866,0.693930,-0.029932,-0.034521,-0.017548,-0.006565,-0.226194,-0.148351,-0.051246,0.068693,...,0.105340,-0.202186,0.000990,0.013040,0.018537,0.063366,0.105340,0.158131,0.162149,0.171464
1021,0.324032,0.695399,-0.030769,-0.035668,-0.017634,-0.007304,-0.226853,-0.150274,-0.050809,0.067366,...,0.105357,-0.202393,0.001066,0.012980,0.018475,0.063604,0.105357,0.158825,0.162554,0.171627
1022,0.322184,0.696878,-0.031613,-0.036822,-0.017720,-0.008050,-0.227517,-0.152223,-0.050369,0.066028,...,0.105374,-0.202601,0.001143,0.012919,0.018415,0.063843,0.105374,0.159523,0.162962,0.171792


In [13]:
selected_columns = ['theta_0', 'theta_1', 'theta_2', 'theta_3', 'theta_4',
                    'theta_5', 'theta_6', 'theta_7', 'theta_8', 'theta_9',
                    'theta_10', 'theta_11', 'theta_12', 'theta_13', 'theta_14',
                    'theta_15', 'theta_16', 'theta_17', 'theta_18', 'theta_19',
                    'theta_20', 'theta_21', 'theta_22', 'theta_23', 'theta_24',
                    'theta_25', 'theta_26', 'theta_27', 'theta_28', 'theta_29',
                    'theta_30', 'theta_31', 'theta_32', 'theta_33', 'theta_34',
                    'theta_35', 'theta_36', 'theta_37', 'theta_38', 'theta_39',
                    'theta_40', 'theta_41', 'theta_42', 'theta_43', 'theta_44',
                    'theta_45']
true_thetas = truth[selected_columns].to_numpy()
true_losses = truth['losses'].to_numpy()

# 03 SGD with Diminishing LR by Distance Diagnostic

Recall that our method runs SGD over random $\tilde λ$'s with a linear basis $\Phi(\tilde \lambda)$ of our choice. We want to approximate $\theta$ with $\Phi(\lambda)\beta$, so the objective function is $\min_\beta h(\Phi(\tilde\lambda)\beta, \tilde\lambda) = (1-\tilde\lambda) BCE(X_\text{pass}\Phi(\tilde\lambda)\beta,\ y_\text{pass}) + \tilde\lambda BCE(X_\text{fail}\Phi(\tilde\lambda)\beta,\ y_\text{fail})$. For each batch of training data set, we randomize $\tilde\lambda$. If batch size = 1, then this is equivalent to a standard SGD.

We use diminishing learning rate for better demonstrate convergence. If we use a constant learning rate, the solution path error will eventually do a random walk after descending to a certain threshold value.

Set weighted_avg to 'False' see this random walk.

In [14]:
max_epochs = 2000

In [171]:
alpha_beta=[-.3, -.7]

In [172]:
phi_lam = customized_jacobi(alpha_beta)

In [173]:
def thresh_basis(basis_dim):
    return -1

## num basis func = 5

In [174]:
basis_dim = 5
# init_lr = .015625

# # [-.5, -.5] 
# init_lr = 0.5 
# [-.3, -.7]
init_lr = 1 
# # [.3, -.7]
# init_lr = .5 
# # [0, 0] 
# init_lr = 0.25 
# # [0, 1] 
# init_lr = 0.25 
# # [1, 1] 
# init_lr = 0.125 


In [175]:
np.random.seed(8675309)
torch.manual_seed(8675309)

num_itr_history, sup_err_history_last_itr, weight, lr, itr = learn_solution_path(input_dim, basis_dim, phi_lam, max_epochs,
                                                               GD_data_loader, test_data_loader, loss_fn,
                                                               lam_min, lam_max, true_losses, init_lr=init_lr,
                                                               diminish=True, thresh_basis=thresh_basis,
                                                               weighted_avg=False,
                                                               record_frequency=10, distribution='beta', alpha_beta=alpha_beta,
                                                               device=device, trace_frequency=100)

sup_err_history_last_itr = np.array(sup_err_history_last_itr)

--------approximate solution path for # itr = 100 complete--------
# epoch: 100	 sup error: 0.002410292625427246
--------approximate solution path for # itr = 200 complete--------
# epoch: 200	 sup error: 0.001023411750793457
--------approximate solution path for # itr = 300 complete--------
# epoch: 300	 sup error: 0.0007383823394775391
--------approximate solution path for # itr = 400 complete--------
# epoch: 400	 sup error: 0.0010739564895629883
--------approximate solution path for # itr = 500 complete--------
# epoch: 500	 sup error: 0.0008317530155181885
--------approximate solution path for # itr = 600 complete--------
# epoch: 600	 sup error: 0.0007255673408508301
--------approximate solution path for # itr = 700 complete--------
# epoch: 700	 sup error: 0.0015810728073120117
--------approximate solution path for # itr = 800 complete--------
# epoch: 800	 sup error: 0.0007995069026947021
--------approximate solution path for # itr = 900 complete--------
# epoch: 900	 sup error

In [176]:
file_path = f'LSP_results_exact_fixed_basis_jacobi_{alpha_beta[0]}_{alpha_beta[1]}_last_iterate_shrink_lr.csv'

LSP_results_exact_fixed_basis = pd.DataFrame(np.column_stack((num_itr_history, sup_err_history_last_itr)), columns=['num_itr', 'sup_err_5'])

# Save the DataFrame to a CSV file
LSP_results_exact_fixed_basis.to_csv(file_path, index=False)

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame
df

Unnamed: 0,num_itr,sup_err_5
0,10.0,0.560709
1,20.0,0.046058
2,30.0,0.037667
3,40.0,0.013352
4,50.0,0.020748
...,...,...
195,1960.0,0.002105
196,1970.0,0.001708
197,1980.0,0.001193
198,1990.0,0.001472


In [177]:
model = Basis_TF_SGD(input_dim, basis_dim, phi_lam, init_weight=weight, intercept=True).to(device)

In [178]:
errs = get_errs_lsp(lam_min[0], lam_max[0], true_losses, model, test_data_loader, loss_fn)

In [179]:
lambdas = np.linspace(lam_max[0], lam_min[0], len(true_losses))

In [180]:
pd.DataFrame(np.column_stack((errs, lambdas)), columns=['err_5', 'lambdas']).to_csv(f'LSP_errs_Jacobi_{alpha_beta[0]}_{alpha_beta[1]}.csv', index=False)

## num basis func = 7

In [181]:
basis_dim = 7
# init_lr = .015625

# # [-.5, -.5] 
# init_lr = 0.5
# [-.3, -.7]
init_lr = 0.5 
# # [.3, -.7]
# init_lr = .25
# # [0, 0] 
# init_lr = 0.125 
# # [0, 1] 
# init_lr = 0.125 
# # [1, 1] 
# init_lr = 0.0625 

In [182]:
np.random.seed(8675309)
torch.manual_seed(8675309)

num_itr_history, sup_err_history_last_itr, weight, lr, itr = learn_solution_path(input_dim, basis_dim, phi_lam, max_epochs,
                                                               GD_data_loader, test_data_loader, loss_fn,
                                                               lam_min, lam_max, true_losses, init_lr=init_lr,
                                                               diminish=True, thresh_basis=thresh_basis,
                                                               weighted_avg=False,
                                                               record_frequency=10, distribution='beta', alpha_beta=alpha_beta,
                                                               device=device, trace_frequency=100)

sup_err_history_last_itr = np.array(sup_err_history_last_itr)

--------approximate solution path for # itr = 100 complete--------
# epoch: 100	 sup error: 0.008315816521644592
--------approximate solution path for # itr = 200 complete--------
# epoch: 200	 sup error: 0.003832399845123291
--------approximate solution path for # itr = 300 complete--------
# epoch: 300	 sup error: 0.0009881556034088135
--------approximate solution path for # itr = 400 complete--------
# epoch: 400	 sup error: 0.00033983588218688965
--------approximate solution path for # itr = 500 complete--------
# epoch: 500	 sup error: 0.00016671419143676758
--------approximate solution path for # itr = 600 complete--------
# epoch: 600	 sup error: 0.00012487173080444336
--------approximate solution path for # itr = 700 complete--------
# epoch: 700	 sup error: 0.00013592839241027832
--------approximate solution path for # itr = 800 complete--------
# epoch: 800	 sup error: 7.31050968170166e-05
--------approximate solution path for # itr = 900 complete--------
# epoch: 900	 sup er

In [183]:
file_path = f'LSP_results_exact_fixed_basis_jacobi_{alpha_beta[0]}_{alpha_beta[1]}_last_iterate_shrink_lr.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

df['sup_err_7'] = sup_err_history_last_itr

# Save the DataFrame to a CSV file
df.to_csv(file_path, index=False)

In [184]:
model = Basis_TF_SGD(input_dim, basis_dim, phi_lam, init_weight=weight, intercept=True).to(device)

In [185]:
errs = get_errs_lsp(lam_min[0], lam_max[0], true_losses, model, test_data_loader, loss_fn)

In [186]:
file_path = f'LSP_errs_jacobi_{alpha_beta[0]}_{alpha_beta[1]}.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

df['err_7'] = errs

# Save the DataFrame to a CSV file
df.to_csv(file_path, index=False)

## num basis func = 9

In [187]:
basis_dim = 9
# init_lr = .015625

# # [-.5, -.5] 
# init_lr = 0.25
# [-.3, -.7]
init_lr = .5
# # [.3, -.7]
# init_lr = .25
# # [0, 0] 
# init_lr = 0.0625 
# # [0, 1] 
# init_lr = 0.125 
# # [1, 1] 
# init_lr = 0.0625 

In [188]:
np.random.seed(8675309)
torch.manual_seed(8675309)

num_itr_history, sup_err_history_last_itr, weight, lr, itr = learn_solution_path(input_dim, basis_dim, phi_lam, max_epochs,
                                                               GD_data_loader, test_data_loader, loss_fn,
                                                               lam_min, lam_max, true_losses, init_lr=init_lr,
                                                               diminish=True, thresh_basis=thresh_basis,
                                                               weighted_avg=False,
                                                               record_frequency=10, distribution='beta', alpha_beta=alpha_beta,
                                                               device=device, trace_frequency=100)

sup_err_history_last_itr = np.array(sup_err_history_last_itr)

--------approximate solution path for # itr = 100 complete--------
# epoch: 100	 sup error: 0.008001536130905179
--------approximate solution path for # itr = 200 complete--------
# epoch: 200	 sup error: 0.002504140138626154
--------approximate solution path for # itr = 300 complete--------
# epoch: 300	 sup error: 0.0005485713481903631
--------approximate solution path for # itr = 400 complete--------
# epoch: 400	 sup error: 0.00020176172256475278
--------approximate solution path for # itr = 500 complete--------
# epoch: 500	 sup error: 8.507072925567627e-05
--------approximate solution path for # itr = 600 complete--------
# epoch: 600	 sup error: 3.702938556676694e-05
--------approximate solution path for # itr = 700 complete--------
# epoch: 700	 sup error: 4.227459430702907e-05
--------approximate solution path for # itr = 800 complete--------
# epoch: 800	 sup error: 1.5854835510253906e-05
--------approximate solution path for # itr = 900 complete--------
# epoch: 900	 sup err

--------approximate solution path for # itr = 1700 complete--------
# epoch: 1700	 sup error: 4.3213367462158203e-07
--------approximate solution path for # itr = 1800 complete--------
# epoch: 1800	 sup error: 5.364418029785156e-07
--------approximate solution path for # itr = 1900 complete--------
# epoch: 1900	 sup error: 4.76837158203125e-07
--------approximate solution path for # itr = 2000 complete--------
# epoch: 2000	 sup error: 6.556510925292969e-07


In [189]:
file_path = f'LSP_results_exact_fixed_basis_jacobi_{alpha_beta[0]}_{alpha_beta[1]}_last_iterate_shrink_lr.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

df['sup_err_9'] = sup_err_history_last_itr

# Save the DataFrame to a CSV file
df.to_csv(file_path, index=False)

In [190]:
model = Basis_TF_SGD(input_dim, basis_dim, phi_lam, init_weight=weight, intercept=True).to(device)

In [191]:
errs = get_errs_lsp(lam_min[0], lam_max[0], true_losses, model, test_data_loader, loss_fn)

In [192]:
file_path = f'LSP_errs_jacobi_{alpha_beta[0]}_{alpha_beta[1]}.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

df['err_9'] = errs

# Save the DataFrame to a CSV file
df.to_csv(file_path, index=False)