In [None]:
from google.colab import drive

DRIVE_PATH = '/content/drive'
drive.mount(DRIVE_PATH)

# Libraries Installation

In [None]:
!pip install pyclustering

# Libraries Import

In [None]:
import warnings

# Basic libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm 
import copy
from matplotlib.cm import get_cmap


# Scaler
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler

# Encoder
from sklearn.preprocessing import LabelEncoder

# Cluster model
from sklearn.cluster import KMeans, DBSCAN, MeanShift, estimate_bandwidth
from pyclustering.cluster.clarans import clarans
from sklearn.mixture import GaussianMixture

# Evaluation
from sklearn.metrics import silhouette_score

# Timer
from multiprocessing import Process, Manager

# Visualization
from pyclustering.cluster import cluster_visualizer

# Tuning Classes


> TuningModel - A Basic(Parent) class for hyperparameter tuning

*   Public Methods:

 **Constructor(param)**
  - @param **param** - dictionary: parameters for tuning

 **fit(x)**
 
 - @param **x** - np.ndarray: Dataframe values for modeling

 **check_best()**
  - Check the best model with scores

 **plot_score(title)**
  - Plot the average score for each key parameters
  - @param **title** - str: Title of plot

* Protected Methods

 **_model_execute(x, params)**
  - @param **x** - np.ndarray: dataframe values
  - @param **params** - dictionary: chosen parameters in whole parameter

 **_model_scoring(model, x)**
  - @param **model** - Any: Data model built in *_model_execute*
  - @param **x** - numpy.ndarray: dataframe values

 **_is_best(model)**
  - @param **model** - Any: Data model built in *_model_execute* compares with the best score

*   Members

 *params*

 - parameter input from constructor

 *model_list*

 - model list, availiable after using **fit**

 *best_params_*

 - best model's parameters, availiable after using **fit**

 *best_score_*

 - best model's score, availiable after using **fit**

 *best_model_*

 - best model, availiable after using **fit**


In [None]:
class TuningModel:
    def __init__(self, param: dict, timer=600):
        self.params = param
        self.model_list = []

        self.best_params_ = None
        self.best_score_ = None
        self.best_model_ = None

        self._dataset = None
        self.__timer = timer

    def _model_execute(self, x, params, result):
        pass

    def _model_scoring(self, model, x):
        pass

    def _is_best(self, model):
        return self.best_score_ is None or self.best_score_ < model['score']

    def check_best(self):
        for result in self.model_list:
            if self._is_best(result):
                self.best_score_ = result['score']
                self.best_params_ = result['params']
                self.best_model_ = result['model']

    def fit(self, x):
        if self.params is None:
            raise Exception('Invalid parameters input: None', 'ParameterError')

        self._dataset = x

        # Traversal for whole indices with digits
        # By computing the digits of each index
        #
        # Examples:
        #
        # Consider three digits (4, 2, 2) - in specific (quadratic, binary, binary)
        # We can get series of numbers:
        # 0 0 0 = 0(10)
        # 0 0 1 = 1
        # 0 1 0 = 2
        # ...
        # 3 0 0 = 12
        # 3 0 1 = 13
        # 3 1 0 = 14
        # 3 1 1 = 15
        #
        # In this case - we can explain the result number as
        # 15 = 3 * (2 * 2) + 1 * (2) + 1
        #
        # Similarly, we can apply them in traversal
        # length = 15 -> get indices (3, 1, 1) for clarans - {'n_cluster', 'num_local', 'max_neighbor'}
        # By using this, we visit whole parameters.

        length = 1
        param_keys = []

        # 1. Calc the total multiple of length, and the individual length
        for key, value in self.params.items():
            length *= len(value)
            param_keys.append(key)

        if length == 0:
            raise Exception('Invalid parameters input: No parameters input', 'ParameterError')

        # 2. Loop until the length became 0
        while length > 0:
            params = {}

            temp = length - 1
            for i, key in enumerate(param_keys):
                # 2-1. Calculate the total multiple of the lower digits
                remain = 1
                for next_key in param_keys[i + 1:]:
                    remain *= len(self.params[next_key])

                # 2-2. Compute index (div)
                index = temp // remain

                # 2-3. Remove current digit number (4 2 2) -> (2 2)
                temp -= index * remain
                params[key] = self.params[key][index]

            return_dict = Manager().dict()

            process = Process(target=self._model_execute, args=(x, params, return_dict))
            process.start()
            process.join(timeout=self.__timer)
            process.terminate()

            length -= 1

            if len(return_dict) == 0:
                warnings.warn(f'Timeout from {self}: Model with {params} exceed {self.__timer}. Model ignored.',
                              UserWarning)
                continue

            model = return_dict['return']

            self.model_list.append({
                'params': params,
                'model': model,
                'score': self._model_scoring(model, x)
            })

        self.check_best()

    def plot_score(self, title: str):
        fig, axs = plt.subplots(3, (len(self.params) + 2) // 3, figsize=(10, 10), constrained_layout=True)
        plt.suptitle(title)

        x = 0
        for key, values in self.params.items():
            value_list, score_list = [], []

            for value in values:
                avg_score, quantity = 0, 0
                for model in self.model_list:
                    if model['params'][key] == value:
                        avg_score += model['score']
                        quantity += 1

                if quantity == 0:
                    continue

                value_list.append(value)
                score_list.append(avg_score / quantity)

            print(key)

            axs[x].set_title(f'Average score based on the value of {key}')
            axs[x].plot(value_list, score_list)
            axs[x].set_xlabel(str(key))
            axs[x].set_ylabel('score')

            x += 1

> ClaransTune - Tuning class for pyclustering.cluster.clarans.clarans
- uses **silhouette coefficient** for scoring
- **Constructor(method, param)**
  - @param **method**: matrice calculation method - 'euclidean', 'manhattan', ..

In [None]:
class ClaransTune(TuningModel):
    def __init__(self, method: str, param: dict):
        super().__init__(param)
        self.__metric = method

    def _model_execute(self, x, params, return_dict):
        # Order dictionary to tuple for parameter input
        param_input = (params['number_clusters'], params['numlocal'], params['maxneighbor'])

        # Input it as parameter
        model = clarans(x, *param_input)
        model.process()

        return_dict['return'] = model

    def _model_scoring(self, model: clarans, x):
        # Match the form of clusters [[cluster1], [cluster2], ...] -> [c1, c2, c1, ...]
        y = np.zeros(x.size // x[0].size)
        for i in range(0, len(model.get_clusters())):
            for index in model.get_clusters()[i]:
                y[index] = i + 1

        return silhouette_score(x, y, metric=self.__metric)

    def plot_score(self, title):
        super.plot_score(self, title)
        visualizer = cluster_visualizer(10, 4)

    def plot_score(self, title: str):
        # 4+Dimensional cannot be visualized
        if 0 < self._dataset.shape[0] < 4:
            visualizer = cluster_visualizer(10, 4)  # 시각적 그래프

            # for result in self.model_list: - Too many models, print the best one only.
            model = self.best_model_

            # Obtain medoid, cluster from model
            medoids = model.get_medoids()
            clusters = model.get_clusters()

            k = self.best_params_['number_clusters']

            visualizer.append_clusters(clusters, self._dataset, k - 3)  # cluster 시각화
            visualizer.append_cluster(medoids, self._dataset, k - 3, marker='x')  # center 좌표 시각화
            visualizer.set_canvas_title(text=f'Clarans Cluster : {k}', canvas=k - 3)
            visualizer.show(figure=plt.figure(figsize=(10,10)))

        super(ClaransTune, self).plot_score(title)


Other clusters
> DbscanTune - Tuning class for sklearn.cluster.DBSCAN

> KMeansTune - Tuning class for sklearn.cluster.KMeans

> GMMTune - Tuning class for sklearn.mixture.GaussianMixture

> MeanShift - Tuning class for sklearn.cluster.MeanShift

- uses **silhouette coefficient** for scoring
- **Constructor(method, param)**
  - @param **method**: matrice calculation method - 'euclidean', 'manhattan', ...

In [None]:
class DbscanTune(TuningModel):
    def __init__(self, method: str, param: dict):
        super().__init__(param)
        self.__metric = method

    def _model_execute(self, x, params, return_dict):
        model = DBSCAN(**params) # send input parameter as keyword to constructor
        model.fit(x)

        return_dict['return'] = model

    def _model_scoring(self, model: DBSCAN, x):
      # Error handling: Number of cluster < 2
      # DBSCAN does not clearify the number of clusters first
      # When we use silhouette score, we can get case of 1 cluster model
      # Return -1 (worst score) when no clustering
        try:
            score = silhouette_score(x, model.labels_, metric=self.__metric)
        except ValueError:
            return -1.0

        return score


class KMeansTune(TuningModel):
    def __init__(self, method: str, param: dict):
        super().__init__(param)
        self.__metric = method

    def _model_execute(self, x, params, return_dict):
        model = KMeans(**params)
        model.fit(x)

        return_dict['return'] = model

    def _model_scoring(self, model: KMeans, x):
        return silhouette_score(x, model.labels_, metric=self.__metric)


class GMMTune(TuningModel):
    def __init__(self, method: str, param: dict):
        super().__init__(param)
        self.__metric = method

    def _model_execute(self, x, params, return_dict):
        model = GaussianMixture(**params)
        model.fit(x)

        return_dict['return'] = model

    def _model_scoring(self, model: GaussianMixture, x):
        return silhouette_score(x, model.predict(x), metric=self.__metric)


class MeanShiftTune(TuningModel):
    def __init__(self, method: str, param: dict):
        super().__init__(param)
        self.__metric = method

    def _model_execute(self, x, params, return_dict):
      model=MeanShift(**params)
      model.fit(x)

      return_dict['return'] = model

    def _model_scoring(self, model: MeanShift, x):
        try:
            score = silhouette_score(x, model.labels_, metric=self.__metric)
        except ValueError:
            return -1.0

        return score

# AutoML - A major function for one operation

- Only supports K-Means, K-Medoids(CLARANS), DBSCAN, Gaussian-Mixture, and MeanShift

In [None]:
# Declare supported_model
supported_model = {
    KMeans: KMeansTune,
    clarans: ClaransTune,
    DBSCAN: DbscanTune,
    GaussianMixture: GMMTune,
    MeanShift: MeanShiftTune
}

> Major function

*  Parameters

 @param **x** - pandas.DataFrame: A dataframe to use

 @param **kwargs**: keyword arguments for clustering

*  **Argument kwargs contains**:
 
 *scaler*: Scaler **types** to use - the modeling is executed with Default + scaled datasets

 *cluster*: Clusters to use - it is **dictionary** that is having the **Cluster Type** as key, and **Its Hyperparametrs** as value.

* **Returns**: *list(tuple, TuningModel)*

  A list of tuning models contains the proceed *TuningModel* and calculated scores.
  
  Element *tuple* contains (its whole parameter, cluster type, scaler type).


    scalar_list = {StandardScaler, MinMaxScaler}
    cluster_list = {
        KMeans: {
            'n_clusters': [2, 3, 4],
            'init': ['k-means++', 'random']
        },
        clarans: {
            'number_clusters': [2, 3, 4],
            'numlocal': [2, 4, 6],
            'maxneighbor': [3, 5, 7]
        },
        DBSCAN: {
            'eps': [0.01, 0.05, 0.1],
            'min_samples': range(2, 6)
        },
        GaussianMixture: {
            'n_components': [2, 3, 4]
        },
        MeanShift:{
         'bandwidth' : [0.8,1.6,3.0]
        }
    }

In [None]:
def major_function(x: pd.DataFrame, **kwargs):
    # Dataframe list with name
    dataframes = {None: x.values}

    # Scalers: Set
    scalers = kwargs.get('scaler')

    # Clusters: Dict (Cluster Type: Parameters)
    clusters = kwargs.get('cluster')

    # Metric methods: Set (str)
    methods = kwargs.get('metric')

    if clusters is None or len(clusters) == 0:
        raise Exception('InputError: No cluster input')

    if scalers is not None:
        for scaler in scalers:
            # Append scaled dataset with their type
            dataframes[scaler] = scaler().fit_transform(x)

    output = []

    for method in methods:
        for cluster, param in clusters.items():
            if not cluster in supported_model.keys():
                warnings.warn(f'Model {cluster} is not supported.', UserWarning)
                continue

            for key, value in dataframes.items():
                model = supported_model[cluster](method, param)

                if model is not None:
                    model.fit(value)
                    output.append((f'{key},{cluster},{method}', model))

    return output

# Main

In [None]:
# an alternative dataframe reads: open file in runtime

#ROOT_PATH = '/content'
#DRIVE_PATH = '/drive'
#FILE_NAME = 'housing.csv'

#df = pd.read_csv(f'{ROOT_PATH}/{FILE_NAME}')

In [None]:
# Import dataset
FILE_PATH = "/MyDrive/machine_learning/data/"
FILE_NAME = "housing.csv"

df = pd.read_csv(f"{DRIVE_PATH}/{FILE_PATH}/{FILE_NAME}")

# Data Exploration

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.describe()

# Data Preprocessing

In [None]:
# Copy data
df_src = df

In [None]:
df = df_src.copy()

In [None]:
# Split median_house_value (for clustering)
df_median = df['median_house_value']

# Drop median_house_df
df.drop(['median_house_value'], axis=1, inplace=True)

# Drop NaN any with how=any
df.dropna(how='any', inplace=True)

# Check splitted one
df_median

In [None]:
# Check NaN again
df.isna().sum()

In [None]:
# Check categorical data counts
df['ocean_proximity'].value_counts()

In [None]:
# Method 1 - LabelEncoder, switch the categorical feature into numeric one
encoder = LabelEncoder()
df['ocean_proximity'] = encoder.fit_transform(df['ocean_proximity'])
print(df['ocean_proximity'].value_counts())

In [None]:
# Display histogram of features
df.hist(bins=50, figsize=(20, 15))
plt.show()

# Data Modeling

> Sample Clusterings - do it without AutoML
- Arbitrary chosen parameters
- Standard-scaled data
- Silhouette index for scoring (euclidean)

> K-Means (k=4)

In [None]:
X = StandardScaler().fit_transform(df)
model = KMeans(n_clusters=4, random_state=1)

# Do Analysis
cluster_labels = model.fit_predict(X)

# Get info
#cluster_info = pd.DataFrame(cluster_labels).drop_duplicates().to_numpy().flatten()

# Make dataframe for each cluster
#clusters_df = []
#for i in range(0, len(cluster_info)):
#  clusters_df.append(pd.DataFrame(columns=df.columns))
#for i in range(0, len(cluster_labels)):
#  clusters_df[cluster_labels[i]] = clusters_df[cluster_labels[i]].append(df.iloc[i, :])

# Print score
print("Model score: ", silhouette_score(X, cluster_labels))

# Plots
fig, ax = plt.subplots(figsize=(10, 10))
plt.scatter(df.loc[:, 'longitude'], df.loc[:, 'latitude'], c=cluster_labels, s=50, cmap='viridis')
plt.show()

> DBSCAN (eps=0.5, minSamples=5)

In [None]:
X = StandardScaler().fit_transform(df)
model = DBSCAN(eps=0.5, min_samples=5)

# Do analysis
cluster_labels = model.fit_predict(X)

# Get info
#cluster_info = pd.DataFrame(cluster_labels).drop_duplicates().to_numpy().flatten()

# Make dataframe for each cluster
#clusters_df = []
#for i in range(0, len(cluster_info)):
#  clusters_df.append(pd.DataFrame(columns=df.columns))
#for i in range(0, len(cluster_labels)):
#  clusters_df[cluster_labels[i]] = clusters_df[cluster_labels[i]].append(df.iloc[i, :])

# Print score
print("Model score: ", silhouette_score(X, cluster_labels))

# Plots
fig, ax = plt.subplots(figsize=(10, 10))
plt.scatter(df.loc[:, 'longitude'], df.loc[:, 'latitude'], c=cluster_labels, s=50, cmap='viridis')
plt.show()

> GaussianMixture (n_components=4)

In [None]:
X = StandardScaler().fit_transform(df)
model = GaussianMixture(n_components=4, random_state=10)

# Do analysis
cluster_labels = model.fit_predict(X)

# Get info
#cluster_info = pd.DataFrame(cluster_labels).drop_duplicates().to_numpy().flatten()

# Make dataframe for each cluster
#clusters_df = []
#for i in range(0, len(cluster_info)):
#  clusters_df.append(pd.DataFrame(columns=df.columns))
#for i in range(0, len(cluster_labels)):
#  clusters_df[cluster_labels[i]] = clusters_df[cluster_labels[i]].append(df.iloc[i, :])

# Print score
print("Model score: ", silhouette_score(X, cluster_labels))

# Plots
fig, ax = plt.subplots(figsize=(10, 10))
plt.scatter(df.loc[:, 'longitude'], df.loc[:, 'latitude'], c=cluster_labels, s=50, cmap='viridis')
plt.show()

> MeanShift (bandwidth=0.6)

In [None]:
X = StandardScaler().fit_transform(df)
model = MeanShift(bandwidth=0.6)

# Do analysis
cluster_labels = model.fit_predict(X)

# Get info
#cluster_info = pd.DataFrame(cluster_labels).drop_duplicates().to_numpy().flatten()

# Make dataframe for each cluster
#clusters_df = []
#for i in range(0, len(cluster_info)):
#  clusters_df.append(pd.DataFrame(columns=df.columns))
#for i in range(0, len(cluster_labels)):
#  clusters_df[cluster_labels[i]] = clusters_df[cluster_labels[i]].append(df.iloc[i, :])

# Print score
print("Model score: ",silhouette_score(X, cluster_labels))

# Plots
fig, ax = plt.subplots(figsize=(10, 10))
plt.scatter(df.loc[:, 'longitude'], df.loc[:, 'latitude'], c=cluster_labels, s=50, cmap='viridis')
plt.show()

> CLARANS (n_clusters=4, numlocal=1, maxneighbor=1)

In [None]:
X = StandardScaler().fit_transform(df)

# Do analysis
model = clarans(X.tolist(), number_clusters=4, numlocal=1, maxneighbor=1)
model.process()

# Get info
cluster_labels = np.zeros(X.size // X[0].size)

for i in range(0, len(model.get_clusters())):
  for index in model.get_clusters()[i]:
    cluster_labels[index] = i + 1

#cluster_info = pd.DataFrame(cluster_labels).drop_duplicates().to_numpy().flatten()

# Make dataframe for each cluster
#clusters_df = []
#for i in range(0, len(cluster_info)):
#  clusters_df.append(pd.DataFrame(columns=df.columns))
#for i in range(0, len(cluster_labels)):
#  clusters_df[int(cluster_labels[i])-1] = clusters_df[int(cluster_labels[i])-1].append(df.iloc[i, :])

# Print score
print("Model score: ",silhouette_score(X, cluster_labels))

# Plots
fig, ax = plt.subplots(figsize=(10, 10))
plt.scatter(df.loc[:, 'longitude'], df.loc[:, 'latitude'], c=cluster_labels, s=50, cmap='viridis')
plt.show()

> Processes with AutoML

In [None]:
k_range = range(2, 13)

scaler_list = {StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler}
cluster_list = {
    KMeans: {
        'n_clusters': k_range,
        'init': ['k-means++', 'random'],
        'random_state': [1]
    },
    clarans: {
       'number_clusters': k_range,
       'numlocal': [1, 2, 3],
       'maxneighbor': [1, 2, 3]
    },
    DBSCAN: {
        'eps': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1],
        'min_samples': range(2, 6)
    },
    GaussianMixture: {
        'n_components': k_range,
        'random_state': [1]
    },
    MeanShift:{
        'bandwidth' : [8e-1, 16e-1, 3]
    }
}
method_list = {'euclidean', 'manhattan'}

output = major_function(df, scaler=scaler_list, cluster=cluster_list, metric=method_list)
print(output)

# Evaluation

In [None]:
output.sort(key=lambda i:i[1].best_score_, reverse=True)
for result in output:
  print(f'TuningModel: {result[0]}: {result[1]}')
  print(f'Best parameters: {result[1].best_params_}')
  print(f'Best Model: {result[1].best_model_}')
  print(f'Best Score: {result[1].best_score_}', end='\n\n')
  
  result[1].plot_score(f'Plot of {result[0]}')
  plt.show()

# Comparison
- Creating cluster by median_house_value attribute to compare with AutoML function

In [None]:
def result_vs_cluster_with_median_house_value():
  df_with_median = df_median.copy()

  # Drop any with how=any
  df_with_median.dropna(how='any', inplace=True)
  # Create new cluster by median_house_value attribute
  df_with_median.reset_index(drop=True, inplace=True)
  n_df = df_with_median.sort_values()
  n_df.reset_index(drop=True,inplace=True)
  n = 20433
  q1 = int((0.3333 * (n + 1)) - 1)
  q2 = int((0.6666 * (n + 1)) - 1)
  df['target'] = np.nan
  print(q1)
  print(q2)
  print(n_df[q1])
  print(n_df[q2])
  df_with_median['target'] = np.nan
  df_with_median.loc[df_with_median['median_house_value'] < 141300, 'target'] = 'cheap'
  df_with_median.loc[df_with_median['target'] != ('cheap'), 'target'] = 'normal'
  df_with_median.loc[df_with_median['median_house_value'] > 230200, 'target'] = 'expensive'
  df_with_median.drop(['median_house_value'], axis=1, inplace=True)

  # Encode the categorical datas
  df_with_median['ocean_proximity'] = encoder.fit_transform(df_with_median['ocean_proximity'])
  df_with_median['target'] = encoder.fit_transform(df_with_median['target'])
  print(df_with_median['ocean_proximity'].value_counts())
  print(df_with_median['target'].value_counts())
  # Scale the dataset
  columns = df_with_median.columns
  data = MaxAbsScaler().fit_transform(df_with_median)
  data = pd.DataFrame(data,columns = columns)
  ids = data['target']
  # Calculate silhouette score
  score = silhouette_score(data, ids)
  print("silhouette score for clustering with median house value %f"%(score))
  # Plot the clustering result and compare it with result of AutoML function
  # Plotting clustering of median house value
  plt.figure(figsize=(12,12))
  plt.subplot(2,1,1)
  plt.tight_layout()
  plt.title("clustering with median house value")
  plt.xlabel('longitude')
  plt.ylabel('latitude')
  plt.scatter(data['longitude'],data['latitude'], c=ids,cmap = get_cmap('plasma'))

  # Plotting clustering of AutoML function
  df_original = df_src.copy()
  df_original.dropna(how='any', inplace=True)
  df_original['ocean_proximity'] = encoder.fit_transform(df_original['ocean_proximity'])
  # Scale the dataset
  columns = df_original.columns
  data = MaxAbsScaler().fit_transform(df_original)
  data = pd.DataFrame(data, columns = columns)
  
  ids = output[0][1].best_model_.fit_predict(data)
  score = silhouette_score(data, ids)
  print("silhouette score for clustering with AutoML function %f"%(score))
  plt.subplot(2,1,2)
  plt.tight_layout()
  plt.title("clustering with AutoML")
  plt.xlabel('longitude')
  plt.ylabel('latitude')
  plt.scatter(data['longitude'],data['latitude'], c=ids,cmap = get_cmap('plasma'))
  plt.show()


In [None]:
result_vs_cluster_with_median_house_value()

# Clustering#2 (sub dataset)
> Correlation using Matrix Heatmap - drop 4 low correlationships

In [None]:
df = df_src.copy()

> Preprocessing

In [None]:
# Drop any with how=any
df.dropna(how='any', inplace=True)

# Method 1 - LabelEncoder, switch the categorical feature into numeric one
encoder = LabelEncoder()
df['ocean_proximity'] = encoder.fit_transform(df['ocean_proximity'])

In [None]:
# Display heatmap for feature engineering
fig, ax = plt.subplots(figsize=(15, 15))
ax = sns.heatmap(df.corr(), annot=True, ax=ax)
plt.show()

In [None]:
# Drop 4 attributes with low correlation coefficient with median_house_value.
drop_target = df.corr()['median_house_value'].sort_values().iloc[:4].index
print(drop_target)

In [None]:
df.drop(list(drop_target), axis=1, inplace=True)

# Drop median_house_value (for clustering)
df.drop(['median_house_value'], axis=1, inplace=True)

> Data analysis

In [None]:
# Same conditions of first tried, but engineered featuers (df) used
output = major_function(df, scaler=scaler_list, cluster=cluster_list, metric=method_list)
print(output)

> Evaluation



In [None]:
output.sort(key = lambda i:i[1].best_score_, reverse=True)
for out in output:
  print(out[0])
  print(out[1])
  print(out[1].best_params_)
  print(out[1].best_model_)
  print(out[1].best_score_)
  print()