In [2]:
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
import tensorflow as tf
import pandas as pd
from tensorflow.keras.layers import Input, Dense, Flatten, Reshape, Conv2D, MaxPooling2D, UpSampling2D

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

In [11]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train.shape

(50000, 1)

In [58]:
original_dimm = x_train.shape[1:]+(2,)
res = original_dimm if len(original_dimm) > 2 else original_dimm+(1,)
res

(28, 28, 2)

In [1]:

# Завантаження даних
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
origin_dim = x_train.shape[1]

# Нормалізація
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# reshape to flat
x_train_flat = x_train.reshape((len(x_train), -1))
x_test_flat = x_test.reshape((len(x_test), -1))

x_train_flat.shape

NameError: name 'tf' is not defined

In [5]:
class Autoencoder(BaseEstimator, TransformerMixin):
    def __init__(self, lat_dim_ae=30):
        self.lat_dim_ae = lat_dim_ae
        self.input_shape = (28, 28, 1)
        self.autoencoder = None
    
    def build_model(self):
        inputs = Input(shape=self.input_shape)
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
        x = MaxPooling2D((2, 2), padding='valid')(x)
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2), padding='valid')(x)
        x = Flatten()(x)
        encoded = Dense(self.lat_dim_ae)(x)

        x = Dense(7 * 7 * 64)(encoded)
        x = Reshape((7, 7, 64))(x)
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
        x = UpSampling2D((2, 2))(x)
        decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

        self.autoencoder = Model(inputs, decoded)
        self.autoencoder.compile(optimizer=Adam(), loss=MeanSquaredError())
    
    def fit(self, X, y=None, **fit_params):
        self.build_model()
        self.autoencoder.fit(X, X, **fit_params)
        return self
    
    def transform(self, X):
        result_x = self.autoencoder.predict(X)
        return result_x.reshape((len(result_x), -1))


In [11]:
param_grid = {
    'lat_dim_ae': list(range(10,160,10))
}

grid_search = GridSearchCV(Autoencoder(), param_grid, cv=2, scoring='accuracy')
grid_search.fit(x_train, x_train)

# Виведення результатів
print("Best parameters found: ", grid_search.best_params_)
print("Best score found: ", grid_search.best_score_)

Best parameters found:  {'lat_dim_ae': 10}
Best score found:  nan


In [6]:
steps = [
         ('AE', Autoencoder()),
         ('MLP', MLPClassifier(solver="lbfgs"))]
pipeline = Pipeline(steps)

param_grid = {
    'AE__lat_dim_ae': list(range(10,110,10)),
}


grid_search = GridSearchCV(pipeline, param_grid=param_grid, cv=2, scoring='accuracy', return_train_score=True, verbose=2)
grid_search.fit(x_train, y_train)

results = pd.DataFrame(grid_search.cv_results_)
results

Fitting 2 folds for each of 10 candidates, totalling 20 fits
  8/938 [..............................] - ETA: 6s 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END ..................................AE__lat_dim_ae=10; total time=  28.3s
  8/938 [..............................] - ETA: 6s 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END ..................................AE__lat_dim_ae=10; total time=  27.8s
  8/938 [..............................] - ETA: 8s 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END ..................................AE__lat_dim_ae=20; total time=  28.1s
[CV] END ..................................AE__lat_dim_ae=20; total time=  26.8s
[CV] END ..................................AE__lat_dim_ae=30; total time=  25.9s
[CV] END ..................................AE__lat_dim_ae=30; total time=  24.6s
[CV] END ..................................AE__lat_dim_ae=40; total time=  24.6s
[CV] END ..................................AE__lat_dim_ae=40; total time=  25.5s
[CV] END ..................................AE__lat_dim_ae=50; total time=  27.4s
[CV] END ..................................AE__lat_dim_ae=50; total time=  24.3s
[CV] END ..................................AE__lat_dim_ae=60; total time=  24.4s
[CV] END ..................................AE__lat_dim_ae=60; total time=  23.3s
[CV] END ..................................AE__lat_dim_ae=70; total time=  26.1s
[CV] END ..................................AE__lat_dim_ae=70; total time=  23.8s
[CV] END ...................

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_AE__lat_dim_ae,params,split0_test_score,split1_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,mean_train_score,std_train_score
0,25.879206,0.279855,2.316259,0.027738,10,{'AE__lat_dim_ae': 10},0.923033,0.9274,0.925217,0.002183,10,0.956533,0.9654,0.960967,0.004433
1,25.295668,0.712479,2.214373,0.054369,20,{'AE__lat_dim_ae': 20},0.9562,0.958033,0.957117,0.000917,9,0.999967,1.0,0.999983,1.7e-05
2,23.003708,0.598582,2.325416,0.022403,30,{'AE__lat_dim_ae': 30},0.962767,0.961333,0.96205,0.000717,8,1.0,1.0,1.0,0.0
3,22.837138,0.486918,2.330524,0.066476,40,{'AE__lat_dim_ae': 40},0.9633,0.9633,0.9633,0.0,6,1.0,1.0,1.0,0.0
4,23.517577,1.591545,2.431969,0.039035,50,{'AE__lat_dim_ae': 50},0.962667,0.962,0.962333,0.000333,7,1.0,1.0,1.0,0.0
5,21.755777,0.558775,2.1725,0.008501,60,{'AE__lat_dim_ae': 60},0.964867,0.964833,0.96485,1.7e-05,2,1.0,1.0,1.0,0.0
6,22.796885,1.15032,2.2255,0.0005,70,{'AE__lat_dim_ae': 70},0.965433,0.9655,0.965467,3.3e-05,1,1.0,1.0,1.0,0.0
7,22.009773,0.004771,2.222999,0.041,80,{'AE__lat_dim_ae': 80},0.964067,0.965267,0.964667,0.0006,3,1.0,1.0,1.0,0.0
8,21.929716,0.24869,2.227517,0.007518,90,{'AE__lat_dim_ae': 90},0.962767,0.9648,0.963783,0.001017,5,1.0,1.0,1.0,0.0
9,22.776813,0.39571,2.334757,0.068759,100,{'AE__lat_dim_ae': 100},0.964933,0.9637,0.964317,0.000617,4,1.0,1.0,1.0,0.0


In [9]:
params = ['param_' + key for key in param_grid.keys() ]
results = pd.DataFrame(grid_search.cv_results_)[[*params, 'mean_test_score', 'mean_fit_time', 'mean_score_time']]
results

Unnamed: 0,param_AE__lat_dim_ae,mean_test_score,mean_fit_time,mean_score_time
0,10,0.925217,25.879206,2.316259
1,20,0.957117,25.295668,2.214373
2,30,0.96205,23.003708,2.325416
3,40,0.9633,22.837138,2.330524
4,50,0.962333,23.517577,2.431969
5,60,0.96485,21.755777,2.1725
6,70,0.965467,22.796885,2.2255
7,80,0.964667,22.009773,2.222999
8,90,0.963783,21.929716,2.227517
9,100,0.964317,22.776813,2.334757


In [32]:
myres = results.iloc[:, :-3]
myres = pd.concat([myres, results.iloc[:, :-3]], axis=1)

In [49]:
myres.apply(lambda x: "_".join(x.astype(str)), axis=1)
# df.apply(lambda row: row['col1'] + row['col2'], axis=1)

0      10_10
1      20_20
2      30_30
3      40_40
4      50_50
5      60_60
6      70_70
7      80_80
8      90_90
9    100_100
dtype: object