<a href="https://colab.research.google.com/github/Titashmkhrj/Credit-card-fraud-detection/blob/master/HPO_MS_TEST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# importing the required libraries
import numpy as np
import pandas as pd

import imblearn 
from imblearn.over_sampling import SMOTE

from sklearn.linear_model import (LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import (RandomizedSearchCV, train_test_split, cross_val_score)


# models as per the sequence in the parameter grid 
model_objects = [LogisticRegression(),
                 LogisticRegression(),
								 LogisticRegression(),
								 PassiveAggressiveClassifier(),
								 RidgeClassifier(),
								 KNeighborsClassifier(),
								 SVC(),
								 DecisionTreeClassifier(),
								 RandomForestClassifier()]



# hyoer-parameter dictionary for the tunningof the models
parameter_grid = {'LR_l1' : {'penalty' : ['l1'],
                              'C' : [0.001, 0.01, 0.1, 1, 10, 100],
                              'random_state' : [42],
                              'solver' : ['liblinear', 'saga'],
                              'max_iter' : [100000]
                          },
				
                  'LR_l2' : {'penalty' : ['l2'],
                              'C' : [0.001, 0.01, 0.1, 1, 10, 100],
                              'random_state' : [42],
                              'solver' : ['newton-cg', 'lbfgs', 'sag', 'saga'],
                              'max_iter' : [100000]
                          },

                  'LR_ElNet' : {'penalty' : ['elasticnet'],
                                'l1_ratio' : [0.3, 0.5, 0.7],
                                'C' : [0.001, 0.01, 0.1, 1, 10, 100],
                                'random_state' : [42],
                                'solver' : ['saga'],
                                'max_iter' : [100000]
                              },

                  'Pass_Agg_clif' : {'c' : [0.001, 0.01, 0.1, 1, 10, 100],
                                      'fit_intercept' : ['True', 'False'],
                                      'random_state' : [42],
                                      'loss' : ['hinge', 'squared_hinge'],
                                      'class_weight' : ['balanced', None]
                                  },
                  
                  'Ridge_clif' : {'alpha' : [500.0, 50.0, 5.0, 0.5, 0.05, 0.005],
                                  'fit_intercept' : ['True', 'False'],
                                  'normalize' : ['True', 'False'],
                                  'class_weight' : ['balanced', None],
                                  'solver' : ['svd', 'cholesky', 'lsqr', 'sparse_cg']
                              },
                  
                  'KN_classif' : {'n_neighbor' : [2,4,6,8,10,],
                                  'p' : [2,3,5]                     
                              },
                  
                  'SVC' : {'c' : [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                           'gamma' : ['scale', 'auto'],
                           'class_weight' : ['balanced', None]                      
                      },
                  
                  'DT_clif' : {'criterion': ['gini','entropy'],
                                'max_features': ['sqrt','log2',None],
                                'min_samples_leaf': [1,2,5,10],
                                'min_samples_split' : [1,2,5,10,15,100],
                                'max_depth': [5,8,15,25,30,None]
                          },
                  
                  'RF_clif' : {'n_estimators' : [120,300,500,800,1200],
                               'max_features': ['sqrt','log2',None],
                                'min_samples_leaf': [1,2,5,10],
                                'min_samples_split' : [1,2,5,10,15,100],
                                'max_depth': [5,8,15,25,30,None]                      
                          }
              }



# reading the feature and target spaces for our project
x_data = pd.read_csv('/content/drive/My Drive/data/features_space.csv')
y_data = pd.read_csv('/content/drive/My Drive/data/target_space.csv')
# dropping an unnecessary column from our target space
y_data.drop('Unnamed: 0', axis=1, inplace=True)


# splitting our dataset into train, validation and test sets
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.3, random_state = 42)
x_optimization, x_validation, y_optimization, y_vaildation = train_test_split(x_train, y_train, test_size = 0.3, random_state = 42)


# resampling our optimization datasets, in order to prevent overfitting of our models on the majority class of the target feature in our
x_optimization_resampled, y_optimization_resampled = SMOTE(random_state=42).fit_resample(x_optimization, y_optimization)
# scaling our features in the optimization dataset
x_optimization_scaled = StandardScaler().fit_transform(x_optimization_resampled)

print('Tunnin the hyper-parameter...............')

# initiating an empty list for storing the optimized models
hyper_parameter_optimized_models = []

# initiating the random search
for grid, model in zip(parameter_grid.values(), model_objects) : 
	optimizer = RandomizedSearchCV(estimator = model,
								param_distributions = grid,
								random_state = 42,
								cv = 3,
								error_score = -1,
								verbose = 10
								).fit(x_optimization_scaled, y_optimization_resampled.ravel())
	# appending the best estimator to a list
	hyper_parameter_optimized_models.append(optimizer.best_estimator_)

print('Hyper parameter tunning is finished.')




# initiating the model selection proess
print('Model selection .........')

# resampling the validation sets that are to be used for model selection
x_validation_resampled, y_validation_resampled = SMOTE(random_state=42).fit_resample(x_validation, y_vaildation)
# scalling the traning set 
x_validation_scaled = StandardScaler().fit_transform(x_validation_resampled)
# initiating an empty list to stre the validation scores of the optimized models
optimized_model_validation_scores = []

for optimized_model in hyper_parameter_optimized_models : 
	model_validation_scores = cross_val_score(optimized_model, x_validation_scaled, y_validation_resampled.ravel(), cv=3)
	optimized_model_validation_scores.append(np.mean(model_validation_scores))

# making a dictionary to store the results of the hyper-parameter optimization and the model selection process.
results_dict = {'optimized_model':hyper_parameter_optimized_models,
								'validation_score':optimized_model_validation_scores
						}

optimized_model_results = pd.DataFrame(results_dict)
# saving the results of the hyper-parameter optimization and model_selection in a csv file
optimized_model_results.to_csv('/content/drive/My Drive/data/model_optimizaion_report.csv')
print('Model selection is finished')


print('Initiating the process of our final phase to judge the average out-of-sample performance of our best found optimized model.')
# selectin gthe best model by its index for the final predictions
best_model_idx = optimized_model_results['validation_score'].idxmax(axis=0)
best_model = optimized_model_results.iloc[best_model_idx,0]

print('The best model to our finding is ', best_model)

# we are utilizing the whole training dataset for this purpose.
# resampling our training datasets, in order to prevent overfitting of our models on the majority class of the target feature in our training set
x_train_resampled, y_train_resampled = SMOTE(random_state=42).fit_resample(x_train, y_train)
# scaling our features in the training dataset
x_train_scaled = StandardScaler().fit_transform(x_train_resampled)

# utillizing cross validation to get an average out-of-sample performance of our best found optimized model for this dataset
out_of_sample_cv_scores = cross_val_score(best_model, x_train_scaled, y_train_resampled.ravel(), cv = 3)
avg_final_score = np.mean(out_of_sample_cv_scores)
print('The final averaeg out-of-sample performance score of our best optimized model is', final_score)

  y = column_or_1d(y, warn=True)


Tunnin the hyper-parameter...............
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=100 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=100, score=0.978, total= 1.1min
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=100 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.1min remaining:    0.0s


[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=100, score=0.978, total=  46.3s
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=100 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.9min remaining:    0.0s


[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=100, score=0.977, total=  46.3s
[CV] solver=saga, random_state=42, penalty=l1, max_iter=100000, C=10 .


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  2.7min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=l1, max_iter=100000, C=10, score=0.978, total= 7.7min
[CV] solver=saga, random_state=42, penalty=l1, max_iter=100000, C=10 .


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 10.4min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=l1, max_iter=100000, C=10, score=0.977, total= 9.3min
[CV] solver=saga, random_state=42, penalty=l1, max_iter=100000, C=10 .


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 19.7min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=l1, max_iter=100000, C=10, score=0.977, total= 9.4min
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=0.001 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed: 29.1min remaining:    0.0s


[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=0.001, score=0.957, total=   1.1s
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=0.001 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed: 29.1min remaining:    0.0s


[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=0.001, score=0.958, total=   1.2s
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=0.001 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 29.1min remaining:    0.0s


[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=0.001, score=0.958, total=   1.1s
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=10 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 29.1min remaining:    0.0s


[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=10, score=0.977, total=  40.6s
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=10 
[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=10, score=0.978, total=  46.0s
[CV] solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=10 
[CV]  solver=liblinear, random_state=42, penalty=l1, max_iter=100000, C=10, score=0.977, total=  46.0s
[CV] solver=saga, random_state=42, penalty=l1, max_iter=100000, C=0.1 
[CV]  solver=saga, random_state=42, penalty=l1, max_iter=100000, C=0.1, score=0.977, total= 3.3min
[CV] solver=saga, random_state=42, penalty=l1, max_iter=100000, C=0.1 
[CV]  solver=saga, random_state=42, penalty=l1, max_iter=100000, C=0.1, score=0.977, total= 8.4min
[CV] solver=saga, random_state=42, penalty=l1, max_iter=100000, C=0.1 
[CV]  solver=saga, random_state=42, penalty=l1, max_iter=100000, C=0.1, score=0.976, total= 8.5min
[CV] solver=liblinear, random_s

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 106.6min finished


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.1 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.1, score=0.976, total=   7.1s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.1 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.1s remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.1, score=0.976, total=   7.1s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.1 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   14.2s remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.1, score=0.975, total=   7.6s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=10 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   21.8s remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=10, score=0.978, total=  10.3s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=10 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   32.2s remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=10, score=0.979, total=  12.8s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=10 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   45.0s remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=10, score=0.978, total=  10.9s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.001 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   55.8s remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.001, score=0.962, total=   4.1s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.001 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   59.9s remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.001, score=0.963, total=   4.2s
[CV] solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.001 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:  1.1min remaining:    0.0s


[CV]  solver=newton-cg, random_state=42, penalty=l2, max_iter=100000, C=0.001, score=0.962, total=   4.1s
[CV] solver=sag, random_state=42, penalty=l2, max_iter=100000, C=10 ..


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:  1.1min remaining:    0.0s


[CV]  solver=sag, random_state=42, penalty=l2, max_iter=100000, C=10, score=0.978, total= 4.0min
[CV] solver=sag, random_state=42, penalty=l2, max_iter=100000, C=10 ..
[CV]  solver=sag, random_state=42, penalty=l2, max_iter=100000, C=10, score=0.978, total= 7.7min
[CV] solver=sag, random_state=42, penalty=l2, max_iter=100000, C=10 ..
[CV]  solver=sag, random_state=42, penalty=l2, max_iter=100000, C=10, score=0.977, total= 7.2min
[CV] solver=saga, random_state=42, penalty=l2, max_iter=100000, C=0.1 
[CV]  solver=saga, random_state=42, penalty=l2, max_iter=100000, C=0.1, score=0.976, total= 1.3min
[CV] solver=saga, random_state=42, penalty=l2, max_iter=100000, C=0.1 
[CV]  solver=saga, random_state=42, penalty=l2, max_iter=100000, C=0.1, score=0.976, total= 4.3min
[CV] solver=saga, random_state=42, penalty=l2, max_iter=100000, C=0.1 
[CV]  solver=saga, random_state=42, penalty=l2, max_iter=100000, C=0.1, score=0.975, total= 4.3min
[CV] solver=lbfgs, random_state=42, penalty=l2, max_iter=

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 31.5min finished


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.001 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.001, score=0.962, total=   5.9s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.001 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.9s remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.001, score=0.962, total=  21.4s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.001 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   27.4s remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.001, score=0.962, total=  21.9s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.5, C=0.001 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   49.2s remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.5, C=0.001, score=0.962, total=   5.7s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.5, C=0.001 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   54.9s remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.5, C=0.001, score=0.963, total=  19.7s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.5, C=0.001 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.2min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.5, C=0.001, score=0.963, total=  20.0s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.1 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  1.6min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.1, score=0.976, total= 2.4min
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.1 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:  4.0min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.1, score=0.976, total= 7.0min
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.1 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 11.0min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.1, score=0.975, total= 6.9min
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.01 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 17.9min remaining:    0.0s


[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.01, score=0.973, total=  42.2s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.01 
[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.01, score=0.973, total= 2.3min
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.01 
[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.7, C=0.01, score=0.972, total= 2.5min
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.01 
[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.01, score=0.972, total=  31.2s
[CV] solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.01 
[CV]  solver=saga, random_state=42, penalty=elasticnet, max_iter=100000, l1_ratio=0.3, C=0.01, score=0.972, total= 1.8min
[CV] solver=sa

In [None]:
parameter_grid = {'LR_l1' : {'penalty' : ['l1'],
                              'C' : [0.001, 0.01, 0.1, 1, 10, 100],
                              'random_state' : [42],
                              'solver' : ['liblinear', 'saga'],
                              'max_iter' : [100000]
                          },
				
                  'LR_l2' : {'penalty' : ['l2'],
                              'C' : [0.001, 0.01, 0.1, 1, 10, 100],
                              'random_state' : [42],
                              'solver' : ['newton-cg', 'lbfgs', 'sag', 'saga'],
                              'max_iter' : [100000]
                          },

                  'LR_ElNet' : {'penalty' : ['elasticnet'],
                                'l1_ratio' : [0.3, 0.5, 0.7],
                                'C' : [0.001, 0.01, 0.1, 1, 10, 100],
                                'random_state' : [42],
                                'solver' : ['saga'],
                                'max_iter' : [100000]
                              },

                  'Pass_Agg_clif' : {'c' : [0.001, 0.01, 0.1, 1, 10, 100],
                                      'fit_intercept' : ['True', 'False'],
                                      'random_state' : [42],
                                      'loss' = ['hinge', 'squared_hinge'],
                                      'class_weight' : ['balanced', None]
                                  },
                  
                  'Ridge_clif' : {'alpha' : [500.0, 50.0, 5.0, 0.5, 0.05, 0.005],
                                  'fit_intercept' : ['True', 'False'],
                                  'normalize' : ['True', 'False'],
                                  'class_weight' : ['balanced', None],
                                  'solver' : ['svd', 'cholesky', 'lsqr', 'sparse_cg']
                              },
                  
                  'KN_classif' : {'n_neighbor' : [2,4,6,8,10,],
                                  'p' : [2,3,5]                     
                              },
                  
                  'SVC' : {'c' : [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                           'gamma' : ['scale', 'auto'],
                           'class_weight' : ['balanced', None]                      
                      },
                  
                  'DT_clif' : {'criterion': ['gini','entropy'],
                                'max_features': ['sqrt','log2',None],
                                'min_samples_leaf': [1,2,5,10],
                                'min_samples_split' : [1,2,5,10,15,100],
                                'max_depth': [5,8,15,25,30,None]
                          },
                  
                  'RF_clif' : {'n_estimators' : [120,300,500,800,1200],
                               'max_features': ['sqrt','log2',None],
                                'min_samples_leaf': [1,2,5,10],
                                'min_samples_split' : [1,2,5,10,15,100],
                                'max_depth': [5,8,15,25,30,None]                      
                          }
              }
'''
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(x_train_res)
x_train_scaled = pd.DataFrame(scaler.transform(x_train_res), columns = x_train_res.columns)
x_test_scaled = pd.DataFrame(scaler.transform(x_test), columns = x_test.columns)
'''