In [842]:
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import time
from sklearn.datasets import fetch_openml
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os

from utility import seed_everything, frobenius_norm_sq, residual_error
from load_data import load_dataset
from svd import svd_error
from random_css import random_css
from greedy_css import greedy_css, greedy_recursive_css, partition_greedy_css
from lscss import lscss_algorithm as lscss

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
seed_everything()

## Load data from dataset

In [157]:
dataset_name = "sonar"
dataset_dir = "datasets"

config_path = os.path.join(dataset_dir, dataset_name, "detail.yaml")
data_matrix = load_dataset(dataset_name, config_path, read_label=False)

Reading data from datasets/sonar/sonar.csv...
Data read done with shape (208, 60)


## Baseline -- best rank-k approximation using SVD

In [883]:
k = 10

baseline = svd_error(data_matrix, k)
baseline

np.float64(44.369160352730574)

## Random

In [884]:
indices_random = random_css(data_matrix, k)
error_random = residual_error(data_matrix, indices_random)
print(f"Selected Indices: {indices_random}, Residual Error: {error_random}")

Selected Indices: [29, 21, 13, 25, 10, 51, 18, 40, 36, 8], Residual Error: 99.56266372475791


## Greedy

In [885]:
indices_greedy = greedy_css(data_matrix, k)
indices_greedy_recursive = greedy_recursive_css(data_matrix, k)
indices_greedy_partition = partition_greedy_css(data_matrix, k, k)

In [886]:
error_greedy = residual_error(data_matrix, indices_greedy)
error_greedy_recursive = residual_error(data_matrix, indices_greedy_recursive)
error_greedy_partition = residual_error(data_matrix, indices_greedy_partition)

In [887]:
print(error_greedy, error_greedy_recursive, error_greedy_partition)

71.60741860624888 71.60741860624888 74.47816305885642


## LSCSS

In [888]:
t =  2 * k

In [893]:
selected_indices_lscss1 = lscss(data_matrix, k, t)
error_lscss1 = residual_error(data_matrix, selected_indices_lscss1)
print(f"selected: {selected_indices_lscss1}\nerror: {error_lscss1}\nerror ratio: {error_lscss1/baseline}")

selected: [np.int64(34), np.int64(37), np.int64(41), np.int64(15), np.int64(18), np.int64(20), np.int64(22), np.int64(25), np.int64(28), np.int64(30)]
error: 70.92179752454732
error ratio: 1.5984480427559553
