load dataset

LOADING DATASET

In [10]:
import gzip
import os
import urllib.request
from MNIST_NN import *
def load_dataset():
    # Download the MNIST dataset files
    url = 'http://yann.lecun.com/exdb/mnist/'
    filenames = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz',
                't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']

    for filename in filenames:
        if not os.path.exists("MNIST_DATASET/"+filename):
            urllib.request.urlretrieve(url + filename, "MNIST_DATASET/"+filename)

    # Function to read the MNIST data files
    def read_mnist_images(filename):
        with gzip.open(filename, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=16)
        return data.reshape(-1, 28*28)

    def read_mnist_labels(filename):
        with gzip.open(filename, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=8)
        return data

    # Load the training and test data
    train_images = read_mnist_images('MNIST_DATASET/train-images-idx3-ubyte.gz')
    train_labels = read_mnist_labels('MNIST_DATASET/train-labels-idx1-ubyte.gz')
    test_images = read_mnist_images('MNIST_DATASET/t10k-images-idx3-ubyte.gz')
    test_labels = read_mnist_labels('MNIST_DATASET/t10k-labels-idx1-ubyte.gz')
    val_mask = np.arange(0,60000)
    np.random.shuffle(val_mask)
    val_images = train_images[val_mask[:2000]]
    val_labels = train_labels[val_mask[:2000]]
    train_images = train_images[val_mask[2000:]]
    train_labels = train_labels[val_mask[2000:]]
    return train_images, train_labels, val_images, val_labels, test_images, test_labels

In [11]:
def load_model(p= 0.7315121964113247, reg= 0):
    model = NeuralNetwork()
    model.layers['l1'] = affineReLULayer(784, 2048,p_keep = p)
    model.reg = reg
    return model

In [21]:
model = load_model()
train_images, train_labels, val_images, val_labels, test_images, test_labels= load_dataset()
num_epochs = 20
batch_size = 512
best_acc = (0,0)
best_model = None

In [22]:
total_iters = math.ceil(len(train_labels)/batch_size)
train_images_loader = np.array_split(train_images,total_iters)
train_labels_loader = np.array_split(train_labels, total_iters)
for epoch in range(num_epochs):
    for iter,(x_train,y_train) in enumerate(zip(train_images_loader, train_labels_loader)):
        model.train()
        y_pred,loss = model(x_train,y_train)
        model.adamStep()

        if (iter+1)%100 == 0:

            predicted = np.argmax(y_pred, axis = 1)
            train_acc = (predicted==y_train).sum()/y_train.shape[0]

            model.eval()
            y_pred = model(val_images)
            predicted = np.argmax(y_pred, axis = 1)
            val_acc = (predicted==val_labels).sum()/val_labels.shape[0]
            print(f"epoch: {epoch+1}  iter:{iter+1}  loss:{loss}  acc: {train_acc}  val_acc: {val_acc}")

            if val_acc>best_acc[0]:
                best_model = model
                print(f'BEST VAL: {val_acc}  TRAIN: {train_acc}')

model = best_model

epoch: 1  iter:100  loss:1.8619595294637097  acc: 0.905511811023622  val_acc: 0.9495
BEST VAL: 0.9495  TRAIN: 0.905511811023622
epoch: 2  iter:100  loss:0.885945029531009  acc: 0.9566929133858267  val_acc: 0.9685
BEST VAL: 0.9685  TRAIN: 0.9566929133858267
epoch: 3  iter:100  loss:0.9310624217767237  acc: 0.9488188976377953  val_acc: 0.972
BEST VAL: 0.972  TRAIN: 0.9488188976377953
epoch: 4  iter:100  loss:0.25810678065670434  acc: 0.9822834645669292  val_acc: 0.9715
BEST VAL: 0.9715  TRAIN: 0.9822834645669292
epoch: 5  iter:100  loss:0.3540608326488518  acc: 0.9665354330708661  val_acc: 0.9755
BEST VAL: 0.9755  TRAIN: 0.9665354330708661
epoch: 6  iter:100  loss:0.18772505578771007  acc: 0.9881889763779528  val_acc: 0.976
BEST VAL: 0.976  TRAIN: 0.9881889763779528
epoch: 7  iter:100  loss:0.13220574679268438  acc: 0.9862204724409449  val_acc: 0.979
BEST VAL: 0.979  TRAIN: 0.9862204724409449
epoch: 8  iter:100  loss:0.19338101693037604  acc: 0.9822834645669292  val_acc: 0.975
BEST VAL: 

In [25]:
def test():
    model.eval()
    y_pred = model(test_images)
    pred_classes = np.argmax(y_pred,axis = 1)
    acc = (pred_classes==test_labels).sum()/test_labels.shape[0]
    return acc
print(test())

0.9767
