In [17]:
import os
import struct
import itertools
import numpy as np
import matplotlib.pyplot as pyplot
from sklearn.metrics import classification_report
from sklearn import svm, metrics
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression

def read(dataset = "training", path = "."):
    """
    Python function for importing the MNIST data set.  It returns an iterator
    of 2-tuples with the first element being the label and the second element
    being a numpy.uint8 2D array of pixel data for the given image.
    """

    if dataset is "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset is "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise Exception("dataset must be 'testing' or 'training'")

    # Load everything in some numpy arrays
    with open(fname_lbl, 'rb') as flbl:
        magic, num = struct.unpack(">II", flbl.read(8))
        lbl = np.fromfile(flbl, dtype=np.int8)

    with open(fname_img, 'rb') as fimg:
        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
        img = np.fromfile(fimg, dtype=np.uint8).reshape(len(lbl), rows, cols)

    get_img = lambda idx: (lbl[idx], img[idx])

    # Create an iterator which returns each image in turn
    for i in range(len(lbl)):
        yield get_img(i)

def show(image):
    """
    Render a given numpy.uint8 2D array of pixel data.
    """
    from matplotlib import pyplot
    import matplotlib as mpl
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    imgplot = ax.imshow(image, cmap=mpl.cm.Greys)
    imgplot.set_interpolation('nearest')
    ax.xaxis.set_ticks_position('top')
    ax.yaxis.set_ticks_position('left')
pyplot.show()

def plot_confusion_matrix(cm, classes, 
                          normalize=False, 
                          title='Confusion matrix', 
                          cmap=pyplot.cm.Blues):
#     ***
#     This function prints and plots the confusion matrix.
#     Normalizaton can be applied by setting 'normalize=True'.
#     ***
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:,np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    print(cm)
    
    pyplot.imshow(cm, interpolation = 'nearest', cmap = cmap)
    pyplot.title(title)
    pyplot.colorbar()
    tick_marks = np.arange(len(classes))
    pyplot.xticks(tick_marks, classes, rotation=45)
    pyplot.yticks(tick_marks, classes)
    
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        pyplot.text(j, i, format(cm[i,j], fmt),
                 horizontalalignment = "center",
                 color="white" if cm[i,j] > thresh else "black")
        
    pyplot.tight_layout()
    pyplot.ylabel('True label')
    pyplot.xlabel('Predicted label')

In [2]:
train = list(read("training", "./data"))
# 이미지
X = []
for i in range(60000):
    X.append(np.ravel(train[i][1]))

# 레이블
Y = []
for i in range(60000):
    Y.append(train[i][0])

In [16]:
train[2][0]

4

In [3]:
test = list(read("testing", "./data"))

# 이미지
X_test = []
for i in range(10000):
    X_test.append(np.ravel(test[i][1]))
Y_true = []
for i in range(10000):
    Y_true.append(np.ravel(test[i][0]))

In [4]:
print(Y_true[0][0])

7


In [None]:
logisticRegr = LogisticRegression(solver = 'saga',
                                  C=0.01, 
                                  multi_class='auto')

kfold = StratifiedKFold(n_splits=10)
cvs = cross_val_score(logisticRegr, X, Y, cv=kfold)
print(cvs)



In [None]:
AVG = np.mean(cvs)
print(AVG)

In [None]:
lr = logisticRegr.fit(X,Y)
Y_pred = lr.predict(X_test)
cm = metrics.confusion_matrix(Y_true, Y_pred)
plot_confusion_matrix(cm, ["0","1","2","3","4","5","6","7","8","9"],
                      normalize=False)

In [None]:
print(classification_report(Y_true, Y_pred))

In [None]:
metrics.f1_score(Y_true,Y_pred,average='micro')