forked from dimstudio/SharpFlow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
grid_search.py
70 lines (54 loc) · 2.93 KB
/
grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
import numpy as np
from sklearn.metrics import *
def grid_search(model, X_train, y_train, X_test, y_test):
check_model = str(type(model))
model_name = type(model).__name__
if ("SVC" in check_model):
print(" ")
print("Analysing of best parameters for", model, "...")
param_grid = {'C': [0.1, 1, 10, 100],
'gamma': [1, 0.1, 0.01, 0.001],
'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}
#param_grid = {'C': [0.1], 'gamma': [0.001], 'kernel': ['poly']} # best parameters SVC
elif ("RandomForestClassifier" in check_model):
print(" ")
print("Analysing of best parameters for", model, "...")
param_grid = {'n_estimators': [10, 100, 120, 200, 300],
'max_features': ['auto', 'sqrt', 'log2'],
'max_depth': [4, 5, 6, 7],
'criterion': ['gini', 'entropy']}
elif ("GradientBoostingClassifier" in check_model):
print(" ")
print("Analysing of best parameters for", model, "...")
param_grid = {"loss": ["deviance"],
"learning_rate": [0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2],
"min_samples_split": np.linspace(0.1, 0.5, 12),
"min_samples_leaf": np.linspace(0.1, 0.5, 12),
"max_depth": [3, 5, 8],
"max_features": ["log2", "sqrt"],
"criterion": ["friedman_mse", "mae"],
"subsample": [0.5, 0.618, 0.8, 0.85, 0.9, 0.95, 1.0],
"n_estimators": [10, 100]}
else:
print("Define grid_param in elif-loop for new model, which you added to models-set")
model = Pipeline([('sampling', SMOTE(sampling_strategy='minority')), ('clf', model)])
new_params = {'clf__' + key: param_grid[key] for key in param_grid}
#grid = GridSearchCV(estimator=model, param_grid=new_params, scoring='accuracy', cv=10, refit=True,
# n_jobs=-1,
# return_train_score=True)
grid = RandomizedSearchCV(estimator=model, param_distributions=new_params, scoring='accuracy', cv=10, refit=True, n_jobs=-1,
return_train_score=True)
grid_results = grid.fit(X_train, y_train)
params = grid_results.best_params_
params = {key.replace('clf__', ''): value for key, value in params.items()}
print("Best score for {0}: {1}, using {2}".format(model_name, grid_results.best_score_, params))
# y_pred = grid_results.predict(X_test)
# scores = accuracy_score(y_test, y_pred)
# scores = f1_score(y_test, y_pred)
# print(scores, "score from grid_search")
return params