## Model Evaluation
### Cross-Validation

In [1]:
# Import
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

# Load data
iris = load_iris()

# Logistic regression classification algorithm
logreg = LogisticRegression(max_iter=1000)

# Cross validation
scores = cross_val_score(logreg, iris.data, iris.target, cv=5)
print("Cross-validation scores: {}".format(scores))

# Average cross validation
print("Average cross-validation score: {:.2f}".format(scores.mean()))

Cross-validation scores: [0.96666667 1.         0.93333333 0.96666667 1.        ]
Average cross-validation score: 0.97


### Imbalanced Datasets

In [2]:
# Import
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

# Load data
digits = load_digits()
y = digits.target == 9

# Train
X_train, X_test, y_train, y_test = train_test_split(
    digits.data, y, random_state=0)

# Logistic regression classification algorithm
logreg = LogisticRegression(max_iter=1000, C=0.1).fit(X_train, y_train)
pred_logreg = logreg.predict(X_test)
print("logreg score: {:.2f}".format(logreg.score(X_test, y_test)))

# Confusion matrix
confusion = confusion_matrix(y_test, pred_logreg)
print("Confusion matrix:\n{}".format(confusion))

logreg score: 0.98
Confusion matrix:
[[402   1]
 [  6  41]]
