# Random Search 사용한 하이퍼파라미터 최적화

## 데이터 가져오기

In [75]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [76]:
wine=pd.read_csv("https://bit.ly/wine_csv_data")
data=wine[["alcohol","sugar","pH"]].to_numpy()
target=wine["class"].to_numpy()

In [77]:
from sklearn.model_selection import train_test_split
train_input,test_input,train_target,test_target=train_test_split(data,target,test_size=0.2,random_state=42)

# 확률 분포 객체

In [78]:
from scipy.stats import uniform, randint
import numpy as np

In [79]:
rgen = randint(0,10) ## 0~9까지의 숫자를 랜던하게 생성하는것

In [80]:
ran_int=rgen.rvs(10)## 0~9까지의 숫자를 랜덤하게 10번 뽑아내는것

In [81]:
ran_int_1000=rgen.rvs(1000)

In [82]:
np.unique(ran_int_1000,return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([107, 100, 118,  79,  99, 112,  91, 103, 101,  90]))

In [83]:
ugen=uniform(0,1)

In [84]:
ugen.rvs(10)

array([0.89497783, 0.55863574, 0.91227607, 0.18294711, 0.23077149,
       0.70166042, 0.36725696, 0.71386089, 0.31881254, 0.33777682])

## RandomizedSearchCV를 사용한 하이퍼파라미터 최적화

In [85]:
params={'min_impurity_decrease':uniform(0.0001,0.001),
        'max_depth': randint(20,50),
        'min_samples_split':randint(2,25),
        'min_samples_leaf':randint(1,25),
}

In [86]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier

In [87]:
dt= DecisionTreeClassifier(random_state=42)

In [88]:
rs=RandomizedSearchCV(dt,params,n_iter=100, n_jobs=1,random_state=42)
rs.fit(train_input,train_target)
rs.best_params_

{'max_depth': 39,
 'min_impurity_decrease': np.float64(0.00034102546602601173),
 'min_samples_leaf': 7,
 'min_samples_split': 13}

In [91]:
np.max(rs.cv_results_["mean_test_score"])

np.float64(0.8695428296438884)

In [90]:
dt=rs.best_estimator_
print(dt.score(test_input,test_target))

0.86
