In [None]:
!pip install ray[tune] tune-sklearn

In [None]:
import torch
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler

data, y = torch.load('../input/titanic/data_v2.pkl')
train_data, test_data = data[:len(y)], data[len(y):]

np.random.seed(42)
X_train, X_test, y_train, y_test = train_test_split(train_data, y, train_size=.8, )

In [None]:
from sklearn.model_selection import cross_validate, cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier

std = StandardScaler()
model = DecisionTreeClassifier()
pipe = Pipeline(
    [('std', std),
     ('model', model)]
)

cross_val_score(pipe, train_data, y, cv=10, n_jobs=-1, verbose=1).mean()


In [None]:
# 超参数搜索
from ray import tune
from ray.tune.sklearn import TuneSearchCV

param = {
    'model__max_depth': tune.randint(3, 500),
    'model__min_samples_split': tune.randint(2, len(train_data) + 1),
    'model__min_samples_leaf': tune.randint(1, len(train_data) + 1),
    'model__max_features': tune.randint(2, train_data.shape[1]+1)}

tune_search = TuneSearchCV(
    pipe,
    param,
    search_optimization="bayesian",
    n_trials=-1,
    verbose=1,
    return_train_score=True,
    mode='max',
    time_budget_s=1800*9,
    error_score=np.nan,
    use_gpu=False,
    cv=10
)
tune_search.fit(train_data, y)
print(tune_search.best_params_)

In [None]:
best_model = tune_search.best_estimator
best_model.score(train_data, y)

In [None]:
import pandas as pd

best_model = tune_search.best_estimator
y_test = best_model.predict(test_data.values)
result = pd.DataFrame(y_test, index=test_data.index, columns=['Survived'], )
result.to_csv('tree.csv')
torch.save(tune_search.best_params_, 'tree.pkl')