In [1]:
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
from utils import apply_PCA, train_valid_test_split
from tuning import tune_model, save_results

MAX_PCA_DIM = 200
DATASET_PERC = 0.6
TEST_SIZE = 0.2
VALIDATION_SIZE = 0.25

## Dataset

In [2]:
X,y = fetch_openml('mnist_784', version=1, return_X_y=True)
y = y.astype(int)
X = X/255

In [3]:
X

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
y

0        5
1        0
2        4
3        1
4        9
        ..
69995    2
69996    3
69997    4
69998    5
69999    6
Name: class, Length: 70000, dtype: int64

In [5]:
# plot_digits(10, X, y)

## Pre-processing 

In [6]:
apply_PCA(X,y,MAX_PCA_DIM,DATASET_PERC,TEST_SIZE,VALIDATION_SIZE)

Applying PCA transformation..: 100%|██████████| 21/21 [03:23<00:00,  9.70s/it]


## Tuning

In [7]:
for name in ["GaussianMixture", "MeanShift", "NormalizedCut"]:
    print('-- ' + name.upper() + ' --')
    results, best_indexes, fitted_estimators, timings = tune_model(name, MAX_PCA_DIM)
    save_results(name,results,best_indexes,fitted_estimators,timings)

-- GAUSSIANMIXTURE --


Loading the PCA datasets..: 100%|██████████| 21/21 [00:00<00:00, 43.63it/s]


  0%|          | 0/21 [00:00<?, ?it/s]

Tuning n_components with PCA = 2..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 12..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 22..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 32..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 42..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 52..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 62..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 72..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 82..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 92..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 102..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 112..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 122..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 132..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 142..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 152..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 162..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 172..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 182..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 192..:   0%|          | 0/11 [00:00<?, ?it/s]

Tuning n_components with PCA = 202..:   0%|          | 0/11 [00:00<?, ?it/s]

-- MEANSHIFT --


Loading the PCA datasets..: 100%|██████████| 21/21 [00:00<00:00, 34.29it/s]


  0%|          | 0/21 [00:00<?, ?it/s]

Tuning bandwidth with PCA = 2..:   0%|          | 0/10 [00:00<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/nicolaaggio/Library/Python/3.10/lib/python/site-packages/IPython/core/interactiveshell.py", line 3433, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/6j/tkfndm3s74b0pqxwmwwbw3l40000gp/T/ipykernel_55571/949026165.py", line 3, in <module>
    results, best_indexes, fitted_estimators, timings = tune_model(name, MAX_PCA_DIM)
  File "/Users/nicolaaggio/Desktop/UNI/Clustering/tuning.py", line 153, in tune_model
    exit(1)
  File "/Users/nicolaaggio/Desktop/UNI/Clustering/tuning.py", line 114, in get_results
  File "/Users/nicolaaggio/Desktop/UNI/Clustering/tuning.py", line 70, in tune_hyperparameter
    for val in tqdm(hyperparameter_values, desc=desc):
  File "/Users/nicolaaggio/Desktop/UNI/Clustering/tuning.py", line 37, in evaluate_model
    mod = model.fit(X_train)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/sklearn/cluster/_mean_shift.py", line 441, in f