<a href="https://colab.research.google.com/github/Marcel-Milosz/Dane/blob/Code-Review/Milosz_Marcel_04_niestacj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install hpsklearn

In [13]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import RandomizedSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split
from hpsklearn import HyperoptEstimator
from hpsklearn import any_classifier
from hpsklearn import any_preprocessing
from hyperopt import tpe

# Getting wine data (features and target)
wine = load_wine()
features = wine['data']
target = wine['target']

logreg = LogisticRegression(solver="liblinear")   # Regression 

penalty = ["l1", "l2"]  # Penalty array

C = np.logspace(0, 4, 1000) # C parameter

hparam = dict(C=C, penalty=penalty)  # hyperparameters

def GridSearch():

  gridsearch = GridSearchCV(logreg, hparam, cv=5, verbose=2, n_jobs=-1)  
  model = gridsearch.fit(features, target)

  print(model.best_estimator_.get_params()['penalty'])
  print(model.best_estimator_.get_params()['C'])

def RandomizedSearch():

  randomizedsearch = RandomizedSearchCV(logreg, hparam, random_state=1, n_iter=1000, cv=5, verbose=0, n_jobs=-1)
  model = randomizedsearch.fit(features, target)

  print(model.best_estimator_.get_params()['penalty'])
  print(model.best_estimator_.get_params()['C'])

def BestAlgorithm():

  pipe = Pipeline([("classifier", RandomForestClassifier())])

  search_space = [
      {"classifier": [logreg], "classifier__penalty": ["l1", "l2"], "classifier__C": np.logspace(0, 4, 10)},
      {"classifier": [RandomForestClassifier()], "classifier__n_estimators": [10, 50, 100], "classifier__max_features": [1, 2, 3]},
      {"classifier": [KNeighborsClassifier()], "classifier__n_neighbors": range(1, 10, 1), "classifier__leaf_size": [30, 60, 90]}
  ]

  gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=1, n_jobs=-1)
  model = gridsearch.fit(features, target)
  print(model.best_estimator_.get_params()["classifier"])


def Hyperopt():

  X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.33, random_state=42)
  
  model = HyperoptEstimator(classifier=any_classifier("cla"), preprocessing=any_preprocessing("pre"), algo=tpe.suggest, max_evals=20, trial_timeout=30, n_jobs=-1)
  model.fit(X_train, y_train)

  accuracy = model.score(X_test, y_test)
  print(f"Model accuracy: {accuracy}")
  print(model.best_model())

print("\nGridSearch function: ")
GridSearch()

print("\nRandomizedSearch function: ")
RandomizedSearch()

print("\nBest algorithm: ")
BestAlgorithm()

print("\n\nHyperopt: ")
# Hyperopt()


GridSearch function: 
Fitting 5 folds for each of 2000 candidates, totalling 10000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 246 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 2182 tasks      | elapsed:   19.7s
[Parallel(n_jobs=-1)]: Done 5430 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 9958 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  1.9min finished


l2
1.6758078645307677

RandomizedSearch function: 
l2
2.887090917359236

Best algorithm: 
Fitting 5 folds for each of 56 candidates, totalling 280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 265 tasks      | elapsed:    4.2s


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features=1,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)


Hyperopt: 


[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:    4.6s finished
