In [5]:
import time
import numpy as np

from sklearn.datasets import fetch_openml # MNIST data
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
import matplotlib.pyplot as pyplot
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report
from sklearn import metrics
import itertools

print(__doc__)

# Author: Arthur Mensch <arthur.mensch@m4x.org>
# License: BSD 3 clause

# Turn down for faster convergence
t0 = time.time()
train_samples = 60000

# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=10000)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Turn up tolerance for faster convergence
clf = LogisticRegression(C=10, penalty='l1', solver='saga', multi_class='multinomial')
clf.fit(X_train, y_train)
sparsity = np.mean(clf.coef_ == 0) * 100
# score = clf.score(X_test, y_test)


###k fold###
kfold = KFold(n_splits=10)
cvs = cross_val_score(clf, X, y, cv=kfold)
print("k fold:",cvs)


# print("Sparsity with L1 penalty: %.2f%%" % sparsity)
# print("Test score with L1 penalty: %.4f" % score)

# coef = clf.coef_.copy()
# plt.figure(figsize=(10, 5))
# scale = np.abs(coef).max()
# for i in range(10):
#     l1_plot = plt.subplot(2, 5, i + 1)
#     l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',
#                    cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
#     l1_plot.set_xticks(())
#     l1_plot.set_yticks(())
#     l1_plot.set_xlabel('Class %i' % i)
# plt.suptitle('Classification vector for...')


AVG = np.mean(clf)
print("avg : ", AVG)

run_time = time.time() - t0
print('Example run in %.3f s' % run_time)
plt.show()
print("Sparsity with L1 penalty: %.2f%%" % sparsity)

def plot_confusion_matrix(cm, classes, 
                          normalize=False, 
                          title='Confusion matrix', 
                          cmap=pyplot.cm.Blues):
#     ***
#     This function prints and plots the confusion matrix.
#     Normalizaton can be applied by setting 'normalize=True'.
#     ***
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:,np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    print(cm)
    
    pyplot.imshow(cm, interpolation = 'nearest', cmap = cmap)
    pyplot.title(title)
    pyplot.colorbar()
    tick_marks = np.arange(len(classes))
    pyplot.xticks(tick_marks, classes, rotation=45)
    pyplot.yticks(tick_marks, classes)
    
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        pyplot.text(j, i, format(cm[i,j], fmt),
                 horizontalalignment = "center",
                 color="white" if cm[i,j] > thresh else "black")
        
    pyplot.tight_layout()
    pyplot.ylabel('True label')
    pyplot.xlabel('Predicted label')
    
from sklearn.metrics import classification_report
from sklearn import metrics
import itertools

# svc = svc.fit(X,Y)
Y_pred = clf.predict(X_test)
cm = metrics.confusion_matrix(y_test,Y_pred)
plot_confusion_matrix(cm, ["0","1", "2", "3", "4", "5", "6", "7", "8", "9"], 
                      normalize=False)
y_pred = clf.predict(X_test)
f1 = f1_score(y_test, y_pred, average =None)
print(format(f1))

print(classification_report(y_test, Y_pred))

metrics.f1_score(y_test,Y_pred,average='micro')

print(__doc__)

# Author: Arthur Mensch <arthur.mensch@m4x.org>
# License: BSD 3 clause

# Turn down for faster convergence
t0 = time.time()
train_samples = 60000

# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=10000)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Turn up tolerance for faster convergence
clf = LogisticRegression(C=10, penalty='l1', solver='saga', multi_class='multinomial')
clf.fit(X_train, y_train)
sparsity = np.mean(clf.coef_ == 0) * 100
# score = clf.score(X_test, y_test)


###k fold###
kfold = KFold(n_splits=10)
clf = cross_val_score(clf, X, y, cv=kfold)
print("k fold:",clf)


# print("Sparsity with L1 penalty: %.2f%%" % sparsity)
# print("Test score with L1 penalty: %.4f" % score)

# coef = clf.coef_.copy()
# plt.figure(figsize=(10, 5))
# scale = np.abs(coef).max()
# for i in range(10):
#     l1_plot = plt.subplot(2, 5, i + 1)
#     l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',
#                    cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
#     l1_plot.set_xticks(())
#     l1_plot.set_yticks(())
#     l1_plot.set_xlabel('Class %i' % i)
# plt.suptitle('Classification vector for...')


AVG = np.mean(cvs)
print("avg : ", AVG)

run_time = time.time() - t0
print('Example run in %.3f s' % run_time)
plt.show()
print("Sparsity with L1 penalty: %.2f%%" % sparsity)

def plot_confusion_matrix(cm, classes, 
                          normalize=False, 
                          title='Confusion matrix', 
                          cmap=pyplot.cm.Blues):
#     ***
#     This function prints and plots the confusion matrix.
#     Normalizaton can be applied by setting 'normalize=True'.
#     ***
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:,np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    print(cm)
    
    pyplot.imshow(cm, interpolation = 'nearest', cmap = cmap)
    pyplot.title(title)
    pyplot.colorbar()
    tick_marks = np.arange(len(classes))
    pyplot.xticks(tick_marks, classes, rotation=45)
    pyplot.yticks(tick_marks, classes)
    
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        pyplot.text(j, i, format(cm[i,j], fmt),
                 horizontalalignment = "center",
                 color="white" if cm[i,j] > thresh else "black")
        
    pyplot.tight_layout()
    pyplot.ylabel('True label')
    pyplot.xlabel('Predicted label')
    
from sklearn.metrics import classification_report
from sklearn import metrics
import itertools

# svc = svc.fit(X,Y)
Y_pred = clf.predict(X_test)
cm = metrics.confusion_matrix(y_test,Y_pred)
plot_confusion_matrix(cm, ["0","1", "2", "3", "4", "5", "6", "7", "8", "9"], 
                      normalize=False)
y_pred = clf.predict(X_test)
f1 = f1_score(y_test, y_pred, average =None)
print(format(f1))

print(classification_report(y_test, Y_pred))

metrics.f1_score(y_test,Y_pred,average='micro')

Automatically created module for IPython interactive environment




k fold: [0.92242857 0.91057143 0.92128571 0.92       0.91914286 0.92385714
 0.925      0.91971429 0.91785714 0.92371429]


TypeError: unsupported operand type(s) for /: 'LogisticRegression' and 'int'