# 01 Import libraries

In [None]:
import numpy as np
import torch


device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using device: {device}")

Using device: cuda


In [None]:
import pandas as pd

In [None]:
import importlib

## Import our own modules

In [None]:
!rm -r Learning-the-Optimal-Solution-Path
!git clone https://github.com/Cumberkid/Learning-the-Optimal-Solution-Path.git

rm: cannot remove 'Learning-the-Optimal-Solution-Path': No such file or directory
Cloning into 'Learning-the-Optimal-Solution-Path'...
remote: Enumerating objects: 1664, done.[K
remote: Counting objects: 100% (634/634), done.[K
remote: Compressing objects: 100% (275/275), done.[K
remote: Total 1664 (delta 481), reused 479 (delta 359), pack-reused 1030[K
Receiving objects: 100% (1664/1664), 13.23 MiB | 20.72 MiB/s, done.
Resolving deltas: 100% (1118/1118), done.


(Using Colab)

In [None]:
import sys

In [None]:
# Add the parent directory to sys.path
sys.path.append('/content/Learning-the-Optimal-Solution-Path')

In [None]:
import lib
importlib.reload(lib)

<module 'lib' from '/content/Learning-the-Optimal-Solution-Path/lib/__init__.py'>

In [None]:
from lib.fast_tensor_data_loader import FastTensorDataLoader
from lib.ngs.naive_grid_search import naive_grid_search
from lib.ngs.utils_ngs import get_losses
from lib.ngs.loss_fn import reg_unif_weighted_logit

# 02 Load data

In [None]:
# file path for Colab. May need to change this
X_df = pd.read_csv('/content/Learning-the-Optimal-Solution-Path/experiments/fair-regression/data/X_processed.csv')
y_df = pd.read_csv('/content/Learning-the-Optimal-Solution-Path/experiments/fair-regression/data/y_processed.csv')

In [None]:
X = np.array(X_df)
y = np.array(y_df).squeeze()

In [None]:
train_X = torch.tensor(X, dtype=torch.float32)
train_y = torch.tensor(y, dtype=torch.float32)

In [None]:
# full gradient descent uses all data points
GD_data_loader = FastTensorDataLoader(train_X, train_y, batch_size=1000, shuffle=True, )
# stochastic gradient descent uses mini-batch
SGD_data_loader = FastTensorDataLoader(train_X, train_y, batch_size=20, shuffle=True, )
# test data
test_data_loader = FastTensorDataLoader(train_X, train_y, batch_size=1000, shuffle=False, )

# 03 Compute the true solution path

We use the Naive Grid Search with $2^{10}$ grids, trained by full gradient descent with tuned learning rate = $2$ and 5000 iterations on each grid, to generate a true solution path.

In [None]:
lam_max = 1
lam_min = 0
input_dim = X.shape[1]
loss_fn = reg_unif_weighted_logit

In [None]:
num_grid = 2**10
lambdas = np.linspace(lam_max, lam_min, num_grid)
fine_delta_lam = (lam_max - lam_min)/(num_grid - 1)

In [None]:
epochs = 5000
lr = 1 # previously tuned

In [None]:
total_itr, reg_params, intercepts, weights, grid_pass_error = naive_grid_search(lam_min, lam_max, num_grid,
                                epochs, loss_fn, GD_data_loader, input_dim, lr=lr, device=device)

In [None]:
losses = get_losses(lam_min, lam_max, fine_delta_lam, intercepts,
                              weights, reg_params, test_data_loader, loss_fn, device)

In [None]:
thetas = np.array(weights)
print(thetas.shape)

(1024, 45)


In [None]:
headers = ['losses', 'theta_0', 'theta_1', 'theta_2', 'theta_3',
            'theta_4', 'theta_5', 'theta_6', 'theta_7', 'theta_8',
            'theta_9', 'theta_10', 'theta_11', 'theta_12', 'theta_13',
            'theta_14', 'theta_15', 'theta_16', 'theta_17', 'theta_18',
            'theta_19', 'theta_20', 'theta_21', 'theta_22', 'theta_23',
            'theta_24', 'theta_25', 'theta_26', 'theta_27', 'theta_28',
            'theta_29', 'theta_30', 'theta_31', 'theta_32', 'theta_33',
            'theta_34', 'theta_35', 'theta_36', 'theta_37', 'theta_38',
            'theta_39', 'theta_40', 'theta_41', 'theta_42', 'theta_43',
            'theta_44', 'theta_45']

exact_soln_list = pd.DataFrame(np.column_stack((losses, intercepts, thetas)), columns=headers)

# Save the DataFrame to a CSV file
exact_soln_list.to_csv('exact_soln_list.csv', index=False)

In [None]:
# Read the CSV file into a DataFrame
truth = pd.read_csv('exact_soln_list.csv')

# Display the DataFrame
truth

In [None]:
# Read the CSV file into a DataFrame
truth = pd.read_csv('exact_soln_list.csv')

# Display the DataFrame
truth

Unnamed: 0,losses,theta_0,theta_1,theta_2,theta_3,theta_4,theta_5,theta_6,theta_7,theta_8,...,theta_36,theta_37,theta_38,theta_39,theta_40,theta_41,theta_42,theta_43,theta_44,theta_45
0,0.000033,-2.734417,0.965609,1.547809,-0.461127,0.750339,0.785537,0.903840,0.609724,0.245244,...,-0.092991,0.353215,-0.022834,0.000000,0.000000,-0.237390,-0.092991,-0.696193,-0.774274,-0.910738
1,0.003984,-1.237352,0.868619,2.217027,-1.023908,0.577028,0.055938,-0.289084,-0.193347,0.491067,...,0.791513,-0.655901,-0.348404,0.159421,0.240847,-0.197884,0.791513,-1.152830,-0.358285,-0.382138
2,0.006419,-0.731118,0.968288,2.944446,-1.317535,0.325186,-0.051009,-0.400514,-1.247546,0.722822,...,1.094857,-0.983527,-0.912177,0.423760,0.587731,-0.237524,1.094857,-1.540321,0.028430,-0.202753
3,0.008501,-0.387372,1.302629,3.495369,-1.586220,0.168759,0.024970,-0.545149,-1.983109,0.889881,...,1.014987,-1.043256,-1.448549,0.780646,0.950633,-0.306635,1.014987,-1.490043,0.220487,-0.161065
4,0.010391,-0.077976,1.779688,3.941692,-1.796844,0.077043,0.097947,-0.660573,-2.422422,0.961997,...,0.867634,-1.124592,-1.918373,1.172459,1.296186,-0.378062,0.867634,-1.346151,0.244212,-0.100623
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1019,0.017910,7.075102,1.749114,-2.494762,0.430425,1.145204,0.126876,-0.533614,-1.153567,0.292879,...,-2.301923,-5.683259,-3.637044,4.570588,4.259853,-2.773711,-2.301923,0.088017,0.576384,0.638555
1020,0.014202,7.312490,1.760152,-2.559923,0.448954,1.160237,0.125508,-0.543709,-1.173318,0.295185,...,-2.281472,-5.710746,-3.642248,4.570588,4.259853,-2.761513,-2.281472,0.162650,0.660690,0.689425
1021,0.010200,7.626506,1.760365,-2.625994,0.467065,1.169712,0.119306,-0.551297,-1.194783,0.299307,...,-2.252983,-5.749014,-3.646690,4.570588,4.259853,-2.747193,-2.252983,0.259606,0.771384,0.757299
1022,0.005740,8.071383,1.735432,-2.705320,0.463105,1.163599,0.093488,-0.552707,-1.209416,0.308961,...,-2.206589,-5.814081,-3.646463,4.570588,4.259853,-2.728747,-2.206589,0.390584,0.925827,0.851714


In [None]:
selected_columns = ['theta_0', 'theta_1', 'theta_2', 'theta_3', 'theta_4',
                    'theta_5', 'theta_6', 'theta_7', 'theta_8', 'theta_9',
                    'theta_10', 'theta_11', 'theta_12', 'theta_13', 'theta_14',
                    'theta_15', 'theta_16', 'theta_17', 'theta_18', 'theta_19',
                    'theta_20', 'theta_21', 'theta_22', 'theta_23', 'theta_24',
                    'theta_25', 'theta_26', 'theta_27', 'theta_28', 'theta_29',
                    'theta_30', 'theta_31', 'theta_32', 'theta_33', 'theta_34',
                    'theta_35', 'theta_36', 'theta_37', 'theta_38', 'theta_39',
                    'theta_40', 'theta_41', 'theta_42', 'theta_43', 'theta_44',
                    'theta_45']
true_thetas = truth[selected_columns].to_numpy()
true_losses = truth['losses'].to_numpy()