In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
def get_data(fname):
    with open(fname, 'rb') as f:
        chunk = np.load(f)
        images = chunk['images']
        labels = chunk['labels']
    return images, labels

In [3]:
def normalize(X):
    X = X.astype(float) - np.c_[X.mean(axis=1)]
    X /= np.c_[X.std(axis=1)]
    return X

In [11]:
def gradDecend(X, y, k, alpha=0.01, iterCnt=100):
    '''
    X: m*n
    y: m*k
    '''
    X = np.c_[X, np.ones((X.shape[0], ))]
    m, n = X.shape
    w = np.ones((n, k)) # (n + 1) * k
    for i in range(iterCnt):
        if i % 10 == 0:
            res = h(X, w).argmax(axis=1)
            cnt = np.sum(res.reshape(m) != y)
            print(f"Train Error Rate: {cnt / m}")
        print(f"Iter {i + 1}")
        p = h(X, w)
        print(f"Cost: {cost(p, y)}")
        err = h(X, w)
        for x in range(m):
            err[x, y[x]] -= 1
        w = w + (alpha / m) * X.T.dot(err)
    return w


In [5]:
images, labels = get_data('data/train.npz')

In [6]:
images = normalize(images.reshape(60000, 784))

In [7]:
def softmax(Z):
    '''
    Z:m*k
    res: m*k, k is number of classes, m is number of samples
    '''
    m, k = Z.shape
    res = np.exp(-Z)
    res /= res.sum(axis=1).reshape((m, 1))
    return res

def h(X, W):
    '''
    X: m*n
    W: n*k
    return: m*k
    '''
    return softmax(X.dot(W))
def cost(p, y):
    '''
    p: m*k
    y: m*k
    '''
    m, k = p.shape
    res = 0
    for i in range(m):
        res -= np.log(p[i, y[i]])
    return res / m

In [8]:
train_imgs = images[:100]
train_labels = labels[:100]

In [14]:
w = gradDecend(train_imgs, train_labels, 10)

Train Error Rate: 0.87
Iter 1
Cost: 2.3025850929940423
Iter 2
Cost: 2.1030469874767808
Iter 3
Cost: 1.927134219111921
Iter 4
Cost: 1.7717717280090612
Iter 5
Cost: 1.6346126353346613
Iter 6
Cost: 1.5135125247823413
Iter 7
Cost: 1.4064450433235203
Iter 8
Cost: 1.311542265749196
Iter 9
Cost: 1.2271365504899616
Iter 10
Cost: 1.1517751463140082
Train Error Rate: 0.12
Iter 11
Cost: 1.0842123881375516
Iter 12
Cost: 1.0233898129416832
Iter 13
Cost: 0.9684119243860531
Iter 14
Cost: 0.9185220130932211
Iter 15
Cost: 0.8730800936079397
Iter 16
Cost: 0.8315436728926043
Iter 17
Cost: 0.7934514018758864
Iter 18
Cost: 0.75840938256412
Iter 19
Cost: 0.7260798103079759
Iter 20
Cost: 0.6961716173324919
Train Error Rate: 0.04
Iter 21
Cost: 0.6684328012885836
Iter 22
Cost: 0.6426441523041514
Iter 23
Cost: 0.6186141265419867
Iter 24
Cost: 0.596174650215276
Iter 25
Cost: 0.575177673099107
Iter 26
Cost: 0.5554923230990043
Iter 27
Cost: 0.5370025422427284
Iter 28
Cost: 0.5196051089978461
Iter 29
Cost: 0.503207

In [15]:
test_imgs, test_labels = get_data('data/test.npz')

In [16]:
def softmaxClassify(data, w):
    '''
    data: m*n
    w: (n+1) * k
    '''
    m, n = data.shape
    return h(np.c_[data, np.ones((m, 1))], w)

In [24]:
test_imgs = normalize(test_imgs.reshape(10000, 784))

In [25]:
ans = softmaxClassify(test_imgs.reshape(10000, 784), w)

In [29]:
ans_label = ans.argmax(axis=1)

In [32]:
np.sum(ans_label != test_labels) / 10000

0.313

In [33]:
w = gradDecend(images, labels, 10, alpha = 0.1, iterCnt=1000)

Train Error Rate: 0.9012833333333333
Iter 1
Cost: 2.3025850929954172
Iter 2
Cost: 1.402673231985745
Iter 3
Cost: 1.065144298413493
Iter 4
Cost: 0.8986086085175427
Iter 5
Cost: 0.8060285290856652
Iter 6
Cost: 0.7571121468736404
Iter 7
Cost: 0.7532457944646882
Iter 8
Cost: 0.7131246256199713
Iter 9
Cost: 0.7264219000403352
Iter 10
Cost: 0.6424780518025446
Train Error Rate: 0.19288333333333332
Iter 11
Cost: 0.6328432691006366
Iter 12
Cost: 0.5887273961657412
Iter 13
Cost: 0.5772833943561538
Iter 14
Cost: 0.5462560285186782
Iter 15
Cost: 0.5336841528301025
Iter 16
Cost: 0.5131653359261328
Iter 17
Cost: 0.5017905958342407
Iter 18
Cost: 0.4884677250692591
Iter 19
Cost: 0.47926242983674294
Iter 20
Cost: 0.4703368473367836
Train Error Rate: 0.12045
Iter 21
Cost: 0.4632416904001713
Iter 22
Cost: 0.45682420219347436
Iter 23
Cost: 0.4512835913931557
Iter 24
Cost: 0.4462731509850768
Iter 25
Cost: 0.4417373385719204
Iter 26
Cost: 0.43755505080617535
Iter 27
Cost: 0.43366970595080756
Iter 28
Cost: 0

Train Error Rate: 0.08606666666666667
Iter 221
Cost: 0.30705979389315763
Iter 222
Cost: 0.3069054283482233
Iter 223
Cost: 0.30675207063224014
Iter 224
Cost: 0.3065997094299708
Iter 225
Cost: 0.30644833360253393
Iter 226
Cost: 0.3062979321838858
Iter 227
Cost: 0.3061484943774033
Iter 228
Cost: 0.30600000955254597
Iter 229
Cost: 0.3058524672415886
Iter 230
Cost: 0.3057058571364346
Train Error Rate: 0.0858
Iter 231
Cost: 0.30556016908551
Iter 232
Cost: 0.30541539309074583
Iter 233
Cost: 0.30527151930458823
Iter 234
Cost: 0.3051285380271182
Iter 235
Cost: 0.3049864397032369
Iter 236
Cost: 0.3048452149199003
Iter 237
Cost: 0.3047048544034174
Iter 238
Cost: 0.3045653490168299
Iter 239
Cost: 0.3044266897573101
Iter 240
Cost: 0.3042888677537099
Train Error Rate: 0.08546666666666666
Iter 241
Cost: 0.30415187426402734
Iter 242
Cost: 0.3040157006730728
Iter 243
Cost: 0.30388033849009033
Iter 244
Cost: 0.303745779346464
Iter 245
Cost: 0.30361201499349694
Iter 246
Cost: 0.30347903730018816
Iter 247

Iter 439
Cost: 0.2867176966907432
Iter 440
Cost: 0.2866591038559085
Train Error Rate: 0.08063333333333333
Iter 441
Cost: 0.2866006922923605
Iter 442
Cost: 0.286542460981459
Iter 443
Cost: 0.2864844089127397
Iter 444
Cost: 0.286426535083779
Iter 445
Cost: 0.2863688385001524
Iter 446
Cost: 0.2863113181753188
Iter 447
Cost: 0.28625397313056666
Iter 448
Cost: 0.2861968023949343
Iter 449
Cost: 0.28613980500511016
Iter 450
Cost: 0.2860829800053706
Train Error Rate: 0.08048333333333334
Iter 451
Cost: 0.28602632644749265
Iter 452
Cost: 0.2859698433907172
Iter 453
Cost: 0.285913529901597
Iter 454
Cost: 0.28585738505399694
Iter 455
Cost: 0.2858014079289984
Iter 456
Cost: 0.28574559761480217
Iter 457
Cost: 0.2856899532066927
Iter 458
Cost: 0.28563447380694446
Iter 459
Cost: 0.28557915852476073
Iter 460
Cost: 0.28552400647620924
Train Error Rate: 0.0804
Iter 461
Cost: 0.2854690167841483
Iter 462
Cost: 0.2854141885781538
Iter 463
Cost: 0.2853595209944741
Iter 464
Cost: 0.28530501317593765
Iter 465


Cost: 0.27711929766835885
Iter 655
Cost: 0.2770848268620463
Iter 656
Cost: 0.2770504245252735
Iter 657
Cost: 0.2770160904069981
Iter 658
Cost: 0.276981824257509
Iter 659
Cost: 0.2769476258284211
Iter 660
Cost: 0.2769134948726506
Train Error Rate: 0.07758333333333334
Iter 661
Cost: 0.2768794311444287
Iter 662
Cost: 0.2768454343992912
Iter 663
Cost: 0.27681150439404895
Iter 664
Cost: 0.276777640886794
Iter 665
Cost: 0.27674384363688853
Iter 666
Cost: 0.2767101124049613
Iter 667
Cost: 0.27667644695289056
Iter 668
Cost: 0.2766428470437986
Iter 669
Cost: 0.27660931244203446
Iter 670
Cost: 0.27657584291319076
Train Error Rate: 0.07745
Iter 671
Cost: 0.27654243822407776
Iter 672
Cost: 0.2765090981426939
Iter 673
Cost: 0.2764758224382667
Iter 674
Cost: 0.2764426108812071
Iter 675
Cost: 0.27640946324311766
Iter 676
Cost: 0.27637637929676523
Iter 677
Cost: 0.27634335881609584
Iter 678
Cost: 0.27631040157623254
Iter 679
Cost: 0.27627750735342727
Iter 680
Cost: 0.27624467592509416
Train Error Rate

Cost: 0.27091474776364466
Iter 872
Cost: 0.2708908826091704
Iter 873
Cost: 0.27086705210634
Iter 874
Cost: 0.2708432561618016
Iter 875
Cost: 0.27081949468258043
Iter 876
Cost: 0.27079576757606366
Iter 877
Cost: 0.27077207474999154
Iter 878
Cost: 0.2707484161124736
Iter 879
Cost: 0.27072479157197604
Iter 880
Cost: 0.2707012010373093
Train Error Rate: 0.07536666666666667
Iter 881
Cost: 0.27067764441766007
Iter 882
Cost: 0.2706541216225527
Iter 883
Cost: 0.27063063256186
Iter 884
Cost: 0.2706071771458162
Iter 885
Cost: 0.2705837552849978
Iter 886
Cost: 0.27056036689031465
Iter 887
Cost: 0.2705370118730422
Iter 888
Cost: 0.2705136901447835
Iter 889
Cost: 0.27049040161748344
Iter 890
Cost: 0.27046714620343393
Train Error Rate: 0.07528333333333333
Iter 891
Cost: 0.27044392381524746
Iter 892
Cost: 0.27042073436588665
Iter 893
Cost: 0.2703975777686476
Iter 894
Cost: 0.27037445393714565
Iter 895
Cost: 0.2703513627853354
Iter 896
Cost: 0.2703283042275074
Iter 897
Cost: 0.27030527817826117
Iter 8

In [34]:
w

array([[ 0.99773381,  0.98534563,  1.01261149, ...,  0.99759642,
         1.00248421,  0.99351367],
       [ 0.99773381,  0.98534563,  1.01261149, ...,  0.99759642,
         1.00248421,  0.99351367],
       [ 0.99773381,  0.98534563,  1.01261149, ...,  0.99759642,
         1.00248421,  0.99351367],
       ..., 
       [ 0.99773381,  0.98534563,  1.01261149, ...,  0.99759642,
         1.00248421,  0.99351367],
       [ 0.99773381,  0.98534563,  1.01261149, ...,  0.99759642,
         1.00248421,  0.99351367],
       [ 1.0495462 ,  0.98924284,  0.94149656, ...,  0.92252026,
         1.2300706 ,  1.05510926]])

In [35]:
with open("result/softmaxNormalized.npz", 'wb') as f:
    np.savez(f, w=w)

In [36]:
ans = softmaxClassify(test_imgs, w)

In [37]:
ans_labels = ans.argmax(axis=1)

In [39]:
np.sum(ans_labels != test_labels) / 10000

0.076300000000000007