In [45]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from tune_sklearn import TuneGridSearchCV
from sklearn.model_selection import cross_val_score
import time
import ray

In [46]:
#ray.init(address="192.168.2.253:6379")
@ray.remote
def f():
    time.sleep(0.01)
    return ray.services.get_node_ip_address()

# Get a list of the IP addresses of the nodes that have joined the cluster.
set(ray.get([f.remote() for _ in range(1000)]))



{'192.168.2.253'}

In [47]:
X, y = fetch_covtype(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=20000,
                                                    test_size=5000,
                                                    random_state=42)
rf0 = RandomForestClassifier(random_state=0)
rf0.fit(X_train, y_train)
print(rf0.get_params())
print(cross_val_score(rf0, X_train, y_train, cv=5))
rf0.score(X_test, y_test)

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 0, 'verbose': 0, 'warm_start': False}
[0.83875 0.83525 0.841   0.8235  0.83675]


0.8404

In [48]:
time_start=time.time()
rf1 = RandomForestClassifier(random_state=0)
param_dist = {"max_depth": [3,2,1],
              "n_estimators": [100,125,150,175],
              "ccp_alpha": [0.001,0.01,0.1]}
rf1 = TuneGridSearchCV(rf1, param_dist)
rf1.fit(X_train, y_train)
time_end=time.time()
print('totally cost',time_end-time_start)
print(rf1.cv_results_)

totally cost 99.01611423492432
{'params': [{'max_depth': 3, 'n_estimators': 100, 'ccp_alpha': 0.001}, {'max_depth': 3, 'n_estimators': 100, 'ccp_alpha': 0.01}, {'max_depth': 3, 'n_estimators': 100, 'ccp_alpha': 0.1}, {'max_depth': 2, 'n_estimators': 100, 'ccp_alpha': 0.001}, {'max_depth': 2, 'n_estimators': 100, 'ccp_alpha': 0.01}, {'max_depth': 2, 'n_estimators': 100, 'ccp_alpha': 0.1}, {'max_depth': 1, 'n_estimators': 100, 'ccp_alpha': 0.001}, {'max_depth': 1, 'n_estimators': 100, 'ccp_alpha': 0.01}, {'max_depth': 1, 'n_estimators': 100, 'ccp_alpha': 0.1}, {'max_depth': 3, 'n_estimators': 125, 'ccp_alpha': 0.001}, {'max_depth': 3, 'n_estimators': 125, 'ccp_alpha': 0.01}, {'max_depth': 3, 'n_estimators': 125, 'ccp_alpha': 0.1}, {'max_depth': 2, 'n_estimators': 125, 'ccp_alpha': 0.001}, {'max_depth': 2, 'n_estimators': 125, 'ccp_alpha': 0.01}, {'max_depth': 2, 'n_estimators': 125, 'ccp_alpha': 0.1}, {'max_depth': 1, 'n_estimators': 125, 'ccp_alpha': 0.001}, {'max_depth': 1, 'n_estimato

In [49]:
print(rf1.best_params_)
print(rf1.best_estimator_)
rf1.best_estimator_.score(X_test, y_test)

{'max_depth': 3, 'n_estimators': 150, 'ccp_alpha': 0.001}
RandomForestClassifier(ccp_alpha=0.001, max_depth=3, n_estimators=150,
                       random_state=0)


0.6688

In [None]:
ray.shutdown()