In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error

In [3]:
df = pd.read_csv('iris.csv')

In [4]:
df

Unnamed: 0,caseno,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,1,5.1,3.5,1.4,0.2,setosa
1,2,4.9,3.0,1.4,0.2,setosa
2,3,4.7,3.2,1.3,0.2,setosa
3,4,4.6,3.1,1.5,0.2,setosa
4,5,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,virginica
146,147,6.3,2.5,5.0,1.9,virginica
147,148,6.5,3.0,5.2,2.0,virginica
148,149,6.2,3.4,5.4,2.3,virginica


In [5]:
from sklearn.datasets import load_iris

iris_data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris_data.data, iris_data.target, test_size=0.2, random_state=121)

In [6]:
rf = RandomForestClassifier()

rf.fit(X_train, y_train)
pred = rf.predict(X_test)
pred

array([1, 2, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 0, 0, 2, 1, 0, 2, 0, 2, 2,
       1, 1, 1, 1, 0, 0, 2, 2])

In [12]:
from sklearn.metrics import f1_score, accuracy_score, precision_score

f1_score(y_test, pred, average='micro')

0.9666666666666667

In [14]:
accuracy_score(y_test, pred)

0.9666666666666667

In [17]:
precision_score(y_test, pred, average='micro')

0.9666666666666667

In [28]:
rf_cv = RandomForestClassifier(random_state = 124, n_jobs=1)

param_grid = {
    'n_estimators' : [100,200],
    'max_depth': [6,8,10,12],
    'min_samples_split' : [8,12,16],
    'min_samples_leaf' : [8,16,20]
}

rf_grid = GridSearchCV(rf_cv, param_grid = param_grid, cv = 3, n_jobs=1)
rf_grid.fit(X_train, y_train)

In [43]:
print(rf_grid.best_params_)
print("최고 검증 점수: {:.2f}". format(rf_grid.best_score_))

{'max_depth': 6, 'min_samples_leaf': 16, 'min_samples_split': 8, 'n_estimators': 100}
최고 검증 점수: 0.97


In [31]:
bm = rf_grid.best_estimator_
pred1 = bm.predict(X_test)
pred1

array([1, 2, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 0, 0, 2, 1, 0, 2, 0, 2, 2,
       1, 1, 1, 1, 0, 0, 2, 2])

In [32]:
f1_score(y_test, pred1, average='micro')

0.9666666666666667

In [33]:
accuracy_score(y_test, pred1)

0.9666666666666667

## GradientBoostingClassifier

In [35]:
gb = GradientBoostingClassifier()

In [36]:
gb.fit(X_train, y_train)

In [38]:
pred3 = gb.predict(X_test)

In [39]:
f1_score(y_test, pred3, average='micro')

0.9666666666666667

In [40]:
accuracy_score(y_test, pred3)

0.9666666666666667

In [44]:
gb_cv = GradientBoostingClassifier()

In [47]:
param_grid1= {
    'n_estimators' : [100,200],
    'max_depth' : [1,3,5],
    'learning_rate' : [0.01,0.05,0.1]
}

gb_grid = GridSearchCV( gb_cv, param_grid = param_grid1, cv=3)
gb_grid.fit(X_train, y_train)

In [48]:
gb_grid.best_params_

{'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}

In [49]:
gb_grid.best_score_

0.975

In [50]:
bm1 = gb_grid.best_estimator_

In [51]:
pred4 = bm1.predict(X_test)

In [52]:
f1_score(y_test, pred4, average='micro')

0.9666666666666667

In [53]:
accuracy_score(y_test, pred4)

0.9666666666666667