In [1]:
import gzip
import struct
import time

import numpy as np
import tensorflow as tf
import numpy.linalg as lng

from tensorflow import keras
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression

from tensorflow.keras.utils import to_categorical

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

from sklearn.model_selection import cross_val_score

print(tf.__version__)

  from ._conv import register_converters as _register_converters


1.11.0


Using TensorFlow backend.


In [2]:
dataset_mnist = np.load('mnist_dataset.npz')
Xtr = dataset_mnist['Xtr']    #training data
Str = dataset_mnist['Str']    #training label
Xts = dataset_mnist['Xts']    #test data
Yts = dataset_mnist['Yts']    #test label
print(Xtr.shape)
print(Str.shape)

(10000, 784)
(10000, 1)


In [3]:
#Form convolutional neural network
model = keras.Sequential([
    
    keras.layers.Conv2D(32, (5, 5), use_bias=True, padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(32, (5, 5), use_bias=True, padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Dropout(0.15),
    
    keras.layers.Conv2D(64, (5, 5), use_bias=True, padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(64, (5, 5), use_bias=True, padding='same', activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Dropout(0.15),
    
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu', use_bias=True),
    keras.layers.Dropout(0.15),

    keras.layers.Dense(2, activation=tf.nn.softmax)
    
])
#Configures the model for training
model.compile(optimizer = 'adam',
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

In [5]:
arr = np.arange(10000)
np.random.shuffle(arr)
for n in[0,2000,4000,6000,8000]: 
    m = n + 2000
    tmp2 = arr[n:m]
    tmp1 = np.append(arr[m:10000],arr[0:n])
    Xtr_80=Xtr[tmp1] #80% training data
    Str_80=Str[tmp1] #80% training label
    Xtr_20=Xtr[tmp2]
    Str_20=Str[tmp2]
    Xtr_cnn_20 = Xtr_20.reshape(len(Xtr_20), 28, 28, 1).astype(np.float32) / 255.0 
    Xtr_cnn_80 = Xtr_80.reshape(len(Xtr_80), 28, 28, 1).astype(np.float32) / 255.0 
    Str_cnn_20 = keras.utils.to_categorical(Str_20, 2) 
    Str_cnn_80 = keras.utils.to_categorical(Str_80, 2)
    model.fit(Xtr_cnn_80,Str_cnn_80,epochs = 2)
    test_loss, test_acc = model.evaluate(Xtr_cnn_20, Str_cnn_20)
    print(test_loss, test_acc)

Epoch 1/2
Epoch 2/2
0.6141155781745911 0.6705
Epoch 1/2
Epoch 2/2
0.5811755175590515 0.691
Epoch 1/2
Epoch 2/2
0.5915079760551453 0.691
Epoch 1/2
Epoch 2/2
0.6013748474121093 0.6855
Epoch 1/2
Epoch 2/2
0.5675721838474274 0.7125


In [6]:
#Xtr_cnn is used as training data for CNN model
Xtr_cnn = Xtr.reshape(len(Xtr), 28, 28, 1).astype(np.float32) / 255.0 
#Str_cnn is used as training label for CNN model
Str_cnn = keras.utils.to_categorical(Str, 2) 

In [7]:
Xtr_cnn.shape

(10000, 28, 28, 1)

In [8]:
#Trains the model for a given number of epochs (iterations on a dataset)
model.fit(Xtr_cnn_80,Str_cnn_80,epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1a1db2c978>

In [9]:
test_loss, test_acc = model.evaluate(Xtr_cnn_20, Str_cnn_20)
print(test_loss, test_acc)

0.8455315637588501 0.654


In [10]:
#Generates output predictions for the input samples.
prediction = model.predict(Xtr_cnn)
print(prediction[:5])
print(prediction.shape)

[[6.9243923e-02 9.3075609e-01]
 [8.0211955e-01 1.9788043e-01]
 [6.0605147e-04 9.9939394e-01]
 [9.5495754e-01 4.5042388e-02]
 [9.5864701e-01 4.1352946e-02]]
(10000, 2)


In [11]:
#Calculate p0 and p1 according to the function in lecture slides
p0 = np.min(prediction[1]) 
p1 = np.min(prediction[0]) 
print(p0)
print(p1)

0.19788043
0.06924392


In [12]:
#Generate beta according to the function in lecture slides
def generatebeta (Str,prediction,p0,p1): 
    beta = np.zeros((len(prediction ),1)) 
    for i in range(len(prediction)): 
        if Str[i] == 0 : 
            beta[i] = ((prediction[i][0] - p1) / ((1 - p0 - p1) * prediction[i][0]))
        else:
            beta[i] = ((prediction[i][1] - p0) / ((1 - p0 - p1) * prediction[i][1]))
    return beta 

In [13]:
beta = generatebeta (Str,prediction,p0,p1) #Generate beta

for i in range(len(beta)):
    if beta[i] < 0:
        beta[i] = 0.0

print(beta[:5]) 

[[0.        ]
 [1.24669695]
 [1.09431874]
 [1.26554904]
 [1.26592981]]


In [14]:
def computeAccuracy(Y,pred_Y): 
    acc = 0.0
    for i in range(len(Y)):
        if Y[i] == pred_Y[i]:
            acc += 1.0
    return acc/len(Y)

In [15]:
Xtr_lr = Xtr_cnn.reshape(10000,784).astype(np.float32)

In [26]:
t1 = time.time()
clf_b = LogisticRegression().fit(Xtr_lr, Str.flatten(), beta.flatten())
t2 = time.time()


In [24]:
#corss validation 
scores = cross_val_score(clf_b, Xtr_lr, Str.flatten(), cv=5)
scores

array([0.63818091, 0.642     , 0.6595    , 0.665     , 0.67383692])

In [25]:
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.66 (+/- 0.03)


In [29]:
pred_Y_b = clf_b.predict(Xts)

acc = computeAccuracy(Yts,pred_Y_b)
print(acc,"  time:",t2 - t1)

0.8765   time: 2.8897018432617188


In [20]:
def generatealpha(p0,p1):
    alpha = ((1-p1+p0) / 2)
    return alpha

In [21]:
alpha = generatealpha(p0,p1)
print(alpha)

0.5643182545900345


In [22]:
alpha_weight = []

for i in range(len(Str)):
    if Str[i] == 1:
        alpha_weight.append(1-alpha)
    else:
        alpha_weight.append(alpha)

print(len(alpha_weight))
print(alpha_weight[:5])

10000
[0.5643182545900345, 0.5643182545900345, 0.4356817454099655, 0.5643182545900345, 0.5643182545900345]


In [23]:
t3 = time
clf_a = LogisticRegression().fit(Xtr_lr, Str.flatten(), alpha_weight)

pred_Y_a = clf_a.predict(Xts)

acc = computeAccuracy(Yts,pred_Y_a)
print(acc)

0.8845
