In [943]:
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import time
from sklearn.datasets import fetch_openml
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os

from css.utility import seed_everything, frobenius_norm_sq, residual_error
from load_data import load_dataset
from svd import svd_error
from css_solver import CSSProblemSolver

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load data from dataset

In [944]:
dataset_name = "sonar"
dataset_dir = "datasets"

config_path = os.path.join(dataset_dir, dataset_name, "detail.yaml")
data_matrix = load_dataset(dataset_name, config_path, read_label=False)

Reading data from datasets/sonar/sonar.csv...
Data read done with shape (208, 60)


## Baseline -- best rank-k approximation using SVD

In [None]:
k = 5
t = 2 * k

baseline = svd_error(data_matrix, k)
baseline

np.float64(100.77146899491107)

In [946]:
solver = CSSProblemSolver()

## Random

In [947]:
indices_random = solver.solve('random', data_matrix, k)
error_random = solver.get_objective(data_matrix, indices_random)
print(f"Selected Indices: {indices_random}\nResidual Error: {error_random}\nError Ratio: {error_random / baseline}")

Selected Indices: [10, 3, 44, 16, 21]
Residual Error: 307.2932081196531
Error Ratio: 3.049406852798497


## Greedy

In [948]:
indices_greedy = solver.solve('greedy', data_matrix, k)
error_greedy = solver.get_objective(data_matrix, indices_greedy)
print(f"Selected Indices: {indices_greedy}\nResidual Error: {error_greedy}\nError Ratio: {error_greedy / baseline}")

indices_greedy_recursive = solver.solve('greedy_rec', data_matrix, k)
error_greedy_recursive = residual_error(data_matrix, indices_greedy_recursive)
print(f"Selected Indices: {indices_greedy_recursive}\nResidual Error: {error_greedy_recursive}\nError Ratio: {error_greedy_recursive / baseline}")

indices_greedy_partition = solver.solve('greedy_par', data_matrix, k, k)
error_greedy_partition = residual_error(data_matrix, indices_greedy_partition)
print(f"Selected Indices: {indices_greedy_partition}\nResidual Error: {error_greedy_partition}\nError Ratio: {error_greedy_partition / baseline}")

Selected Indices: [25, 18, 33, 29, 37]
Residual Error: 151.83737430295716
Error Ratio: 1.5067496367511015
Selected Indices: [np.int64(25), np.int64(18), np.int64(33), np.int64(29), np.int64(37)]
Residual Error: 151.83737430295716
Error Ratio: 1.5067496367511015
Selected Indices: [np.int64(25), np.int64(37), np.int64(13), np.int64(30), np.int64(20)]
Residual Error: 154.76230157335456
Error Ratio: 1.5357749878705251


## LSCSS

In [952]:
selected_indices_lscss = solver.solve('lscss', data_matrix, k, t)
error_lscss = solver.get_objective(data_matrix, selected_indices_lscss)
print(f"selected: {selected_indices_lscss}\nerror: {error_lscss}\nerror ratio: {error_lscss/baseline}")

selected: [np.int64(35), np.int64(44), np.int64(18), np.int64(23), np.int64(28)]
error: 148.73296467650232
error ratio: 1.4759432025746622


## Brute Force

In [953]:
selected_indices_bf = solver.solve('bf', data_matrix, k)
error_bf = solver.get_objective(data_matrix, selected_indices_bf)
print(f"selected: {selected_indices_bf}\nerror: {error_bf}\nerror ratio: {error_bf / baseline}")

ValueError: Unknown method: bf. Methods should be within: ['random', 'greedy', 'greedy_rec', 'greedy_par', 'lscss']