# 평가 및 성능

## 교차검증

In [18]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import koreanize_matplotlib

from sklearn.cluster import KMeans, DBSCAN

import FinanceDataReader as fdr

In [19]:
iris = load_iris()
logreg = LogisticRegression(max_iter=1000)

In [20]:
scores = cross_val_score(logreg, iris.data, iris.target)
scores

array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

In [21]:
scores.mean()

0.9733333333333334

In [24]:
res = cross_validate(logreg, iris.data, iris.target, return_train_score=True)
res

{'fit_time': array([0.01000333, 0.00548458, 0.01954103, 0.01045513, 0.00629902]),
 'score_time': array([0.        , 0.        , 0.00099707, 0.0010221 , 0.        ]),
 'test_score': array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ]),
 'train_score': array([0.96666667, 0.96666667, 0.98333333, 0.98333333, 0.975     ])}

In [23]:
res_df = pd.DataFrame(res)
res_df
print("평균 시간과 점수:\n", res_df.mean())

평균 시간과 점수:
 fit_time       0.010956
score_time     0.000199
test_score     0.973333
train_score    0.975000
dtype: float64


In [27]:
from sklearn.svm import SVC
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)

In [29]:
best_score = 0

In [30]:
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train, y_train)
        score = svm.score(X_test, y_test)
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}
best_parameters

{'C': 100, 'gamma': 0.001}

In [31]:
param_grid = {"C": [0.001, 0.01, 0.1, 1, 10, 100],
              "gamma":[0.001, 0.01, 0.1, 1, 10, 100]}

In [40]:
from sklearn.model_selection import GridSearchCV

In [41]:
grid_search = GridSearchCV(SVC(), param_grid, cv=5, return_train_score=True)

In [42]:
grid_search_result = grid_search.fit(X_train, y_train)

In [43]:
grid_search_result.score(X_test, y_test)

1.0

In [44]:
grid_search_result.best_params_

{'C': 100, 'gamma': 0.01}