In [3]:


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt
import json


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



data = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
MODEL_PATH = "/kaggle/working/nn_weights.txt"

data = np.array(data)
m,n = data.shape
np.random.shuffle(data)

data_dev = data[0 :1000].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n] / 255
np.savez_compressed("/kaggle/working/dev.npz", X_dev=X_dev, Y_dev=Y_dev)

data_train = data[1000 : m].T
Y_train = data_train[0]
X_train = data_train[1:n] / 255

    

def initParams():
    w1 = np.random.randn(10, 784)
    b1 = np.random.randn(10,1)
    w2 = np.random.randn(10,10)
    b2 = np.random.randn(10,1)
    return w1, b1, w2, b2

def forwardProp(w1, w2, b1, b2, X):
    Z1 = w1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = w2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def forwardPropTest(w1, w2, b1, b2, Xi):
    Z1 = w1.dot(Xi) + b1
    #print(Z1)
    A1 = ReLU(Z1)
    #print(A1)
    Z2 = w2.dot(A1) + b2
    #print(Z2)
    A2 = softmax(Z2)
    #print(A2)
    print(A2[:, 0].argmax())
    

def ReLU(Z):
    return np.maximum(0,Z)

def softmax(Z, axis=0):              # columns are samples: (10, 1000)
    Z = Z - Z.max(axis=axis, keepdims=True)   # stability
    eZ = np.exp(Z)
    return eZ / eZ.sum(axis=axis, keepdims=True)


    
def updateParams(a, w1,dw1, b1, db1, w2, dw2, b2, db2):
    w1 = w1 - a* dw1
    b1 = b1 - a*db1
    w2 = w2 - a* dw2
    b2 = b2 - a* db2
    return w1, w2, b1, b2


def oneHot(Y):
    oneHotY = np.zeros((Y.size, Y.max() + 1), dtype=np.float32)
    oneHotY[np.arange(Y.size), Y] = 1
    oneHotY = oneHotY.T
    return oneHotY

def backProp(w2, A2, X, Y, A1, Z1, Z2):
    dZ2 = A2-oneHot(Y)
    dW2 = (dZ2.dot(A1.T)) / m
    dB2 = np.sum(dZ2) / m
    gPrime = Z1 > 0
    dZ1 = (w2.T.dot(dZ2)) * (gPrime)
    dW1 = dZ1.dot(X.T) / m
    dB1 = np.sum(dZ2) / m
    return dB1, dB2, dW1, dW2

def get_Predictions(A2):
    return np.argmax(A2, 0)
def checkAccuracy(predictions, Y):
    
    return np.sum(predictions == Y) / Y.size

def gradientDescent(X, Y, iterations, a):
    w1, b1, w2, b2 = initParams()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forwardProp(w1,w2,b1,b2,X_train)
        db1, db2, dw1, dw2 = backProp(w2, A2, X_train, Y_train, A1, Z1, Z2)
        w1, w2, b1, b2 = updateParams(a, w1, dw1, b1, db1, w2, dw2, b2, db2)
        if (i%10 == 0):
            print("iteration: ", i)
            print("Accuracy: ", checkAccuracy(get_Predictions(A2), Y))
    return w1, b1, w2, b2
    

def save_quadruple_txt(path, w1, b1, w2, b2):
    """
    Save weights to a text (JSON) file.
    Stores shape + dtype so you get the exact arrays back.
    """
    def pack(arr):
        arr = np.asarray(arr)
        return {"shape": arr.shape, "dtype": str(arr.dtype), "data": arr.tolist()}

    payload = {
        "w1": pack(w1),
        "b1": pack(b1),
        "w2": pack(w2),
        "b2": pack(b2),
    }
    with open(path, "w") as f:
        json.dump(payload, f)  # add indent=2 for human-readable


def load_quadruple_txt(path):
    """
    Load weights from the text (JSON) file created above.
    Returns (w1, b1, w2, b2)
    """
    def unpack(obj):
        arr = np.array(obj["data"], dtype=np.dtype(obj["dtype"]))
        return arr.reshape(obj["shape"])

    with open(path, "r") as f:
        payload = json.load(f)

    w1 = unpack(payload["w1"])
    b1 = unpack(payload["b1"])
    w2 = unpack(payload["w2"])
    b2 = unpack(payload["b2"])
    return w1, b1, w2, b2

/kaggle/input/mnist-dataset/train-images.idx3-ubyte
/kaggle/input/mnist-dataset/t10k-labels.idx1-ubyte
/kaggle/input/mnist-dataset/t10k-images.idx3-ubyte
/kaggle/input/mnist-dataset/train-labels.idx1-ubyte
/kaggle/input/mnist-dataset/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte
/kaggle/input/mnist-dataset/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte
/kaggle/input/mnist-dataset/train-labels-idx1-ubyte/train-labels-idx1-ubyte
/kaggle/input/mnist-dataset/train-images-idx3-ubyte/train-images-idx3-ubyte
/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


In [None]:

w1, b1, w2, b2 = gradientDescent(X_train, Y_train, 1000, 0.1)


In [None]:
#save most recently trained model
save_quadruple_txt(MODEL_PATH, w1, b1, w2, b2)


In [2]:
#load current model from txt file
w1_, b1_, w2_, b2_ = load_quadruple_txt(MODEL_PATH)

NameError: name 'load_quadruple_txt' is not defined

In [9]:
#test *loaded model* on developmental data
Z1, A1, Z2, A2 = forwardProp(w1_, w2_, b1_, b2_, X_dev)
print(checkAccuracy(get_Predictions(A2), Y_dev))

0.628


In [None]:
#display random image in X_dev and then forward propogate using *loaded model*
i = np.random.randint(1, 800) 
X_dev_transpose = X_dev.T
x = X_dev_transpose[i]                 # shape (784,)
y = Y_dev[i]

img = x.reshape(28, 28)        # -> (28,28)
plt.imshow(img, cmap="gray", vmin=0, vmax=1 if img.max()<=1.0 else 255)
plt.title(f"Label: {y}")
plt.axis("off")
plt.show()


#test current model on this image
forwardPropTest(w1_,w2_, b1_, b2_, X_dev[:, i:i+1])
