<a href="https://colab.research.google.com/github/ManG0A2/MLP-class/blob/main/4_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#결정트리, 랜덤포레스트, 엑스트라 트리, 그래디언트 부스팅, 히스토그램기반 그래디언트 부스팅 + 최적화
import pandas as pd
wine = pd.read_csv('https://bit.ly/wine_csv_data')

data = wine[['alcohol', 'sugar', 'pH' ]].to_numpy()
target = wine['class'].to_numpy()

from sklearn.model_selection import train_test_split

train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, stratify=target,random_state=42)

from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(train_input)

train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)

In [3]:
#결정트리

from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.fit(train_scaled, train_target)

print(dt.score(train_scaled, train_target)) # 훈련 세트
print(dt.score(test_scaled, test_target)) # 테스트 훈련

0.9978833942659227
0.8646153846153846


In [4]:
#결정트리 최적화

from scipy.stats import uniform, randint

params = {'min_impurity_decrease': uniform(0.0001, 0.001),
         'max_depth': randint(20, 50),
         'min_samples_split': randint(2, 25),
         'min_samples_leaf': randint(1,25),
         }


In [5]:
from sklearn.model_selection import RandomizedSearchCV
gs = RandomizedSearchCV(DecisionTreeClassifier(random_state=42), params,
                                              n_iter=100, n_jobs=-1, random_state=42)
gs.fit(train_input, train_target)

In [6]:
print(gs.best_params_)

{'max_depth': 26, 'min_impurity_decrease': 0.0002743664290049914, 'min_samples_leaf': 1, 'min_samples_split': 9}


In [7]:
import numpy as np

print(np.max(gs.cv_results_['mean_test_score']))

0.8689625749611313


In [8]:
dt = DecisionTreeClassifier(max_depth=26,min_impurity_decrease= 0.0002743664290049914, min_samples_leaf= 1, min_samples_split= 9, random_state=42)
dt.fit(train_scaled, train_target)

print(dt.score(train_scaled, train_target))
print(dt.score(test_scaled, test_target))

0.911487396574947
0.8630769230769231


In [9]:
#랜덤 포레스트


from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_jobs=-1, random_state=42)

scores = cross_validate( rf, train_input, train_target,
                       return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9981720130385032 0.8937817428000298


In [20]:
#랜덤 포레스트 최적화
params = {'min_impurity_decrease': uniform(0.0001, 0.001),
         'max_depth': randint(20, 50),
         'min_samples_split': randint(2, 25),
         'min_samples_leaf': randint(1,25),
         }

In [21]:
gs = RandomizedSearchCV(RandomForestClassifier(random_state=42), params,
                                              n_iter=100, n_jobs=-1, random_state=42)
gs.fit(train_input, train_target)

In [22]:
print(gs.best_params_)

{'max_depth': 26, 'min_impurity_decrease': 0.0002743664290049914, 'min_samples_leaf': 1, 'min_samples_split': 9}


In [23]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_jobs=-1, max_depth=26,min_impurity_decrease= 0.0002743664290049914, min_samples_leaf= 1, min_samples_split= 9,random_state=42)

scores = cross_validate( rf, train_input, train_target,
                       return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9056667456956127 0.8753107647886281


In [24]:
#엑스트라 트리
from sklearn.ensemble import ExtraTreesClassifier
et = ExtraTreesClassifier(n_jobs=-1, random_state=42)
scores = cross_validate(et, train_input, train_target,
                       return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9981720130385032 0.8924350336862368


In [25]:
params = {'min_impurity_decrease': uniform(0.0001, 0.001),
         'max_depth': randint(20, 50),
         'min_samples_split': randint(2, 25),
         'min_samples_leaf': randint(1,25),
         }

In [26]:
gs = RandomizedSearchCV(ExtraTreesClassifier(random_state=42), params,
                                              n_iter=100, n_jobs=-1, random_state=42)
gs.fit(train_input, train_target)

In [27]:
print(gs.best_params_)

{'max_depth': 26, 'min_impurity_decrease': 0.0008965429868602329, 'min_samples_leaf': 15, 'min_samples_split': 12}


In [28]:
from sklearn.ensemble import ExtraTreesClassifier
et = ExtraTreesClassifier(n_jobs=-1, max_depth=26,min_impurity_decrease= 0.0008965429868602329, min_samples_leaf= 15, min_samples_split= 12,random_state=42)
scores = cross_validate(et, train_input, train_target,
                       return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.7538964799489216 0.75389649811209


In [11]:
#그레디언트 부스팅

from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=42)
scores = cross_validate(gb, train_input, train_target,
                       return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.886136193834053 0.8714622047827053


In [30]:
#그래디언트 부스팅 최적화

params = {'learning_rate': np.arange(0.1, 0.5, 0.1),
         'n_estimators': range(100, 500, 100),
          }

In [31]:
gs = GridSearchCV(GradientBoostingClassifier(random_state=42), params, n_jobs=-1)
gs.fit(train_input, train_target)

In [32]:
print(gs.best_params_)

{'learning_rate': 0.4, 'n_estimators': 400}


In [33]:
gb = GradientBoostingClassifier(n_estimators=400, learning_rate=0.4,
                               random_state=42)

scores = cross_validate(gb, train_input, train_target,
                       return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9631035141499419 0.8795437550899534


In [17]:
#히스토그램 기반 그레이디언트 부스팅
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

hgb = HistGradientBoostingClassifier(random_state=42)
scores = cross_validate(hgb, train_input, train_target,
                       return_train_score=True)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))




0.930536541746549 0.8780021470348707


In [18]:
#히스토그램 기반 그레이디언트 부스팅 최적화

from xgboost import XGBClassifier

xgb = XGBClassifier(tree_method='hist', random_state=42)
scores = cross_validate(xgb, train_input, train_target,
                       return_train_score=True)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9524726282724838 0.8785790701117939
