In [148]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [129]:
training_data = pd.read_csv('mnist/train.csv')

In [130]:
#perform binary classification on the dataset instead of multi-class classification as a test
#as such, only records with a label of 0 or 1 are used

#filtering
binary_df = training_data[training_data['label'].isin([0,1])]

In [131]:
X = binary_df.drop(columns=['label']).values
y = binary_df['label'].values

In [140]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [141]:
print(X_train.shape)
print(y_train.shape)

(7052, 784)
(7052,)


In [178]:
class twoLayerNet():
    def __init__(self) -> None:
        self.params = {}
        
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    
    def forward(self, X):
    
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        z1 = np.einsum("ij,jk->ik",W1.T,X) + b1
        a1 = self.sigmoid(z1)
        z2 = np.einsum("ij,jk->k",W2.T,a1) + b2
        a2 = self.sigmoid(z2)
        
        return z1, a1, z2, a2
    
    
    def backward(self, X, y, learning_rate, z1, a1, z2, a2):
        
        n0 = X.shape[0]
        m = X.shape[1]
        
        dz2 = a2 - y
        dW2 = (1/m) * np.dot(dz2, a1.T)
        db2 = (1/m) * np.sum(dz2, axis=1, keepdims=True)
        dz1 = np.dot(self.params['W2'], dz2) * (a1 * (1 - a1))
        dW1 = np.dot(dz1, X.T)
        db1 = (1/m) * np.sum(dz1, axis=1, keepdims=True)
        
        
        self.params['W1'] -= learning_rate * dW1.T
        self.params['W2'] -= learning_rate * dW2.T
        self.params['b1'] -= learning_rate * db1
        self.params['b2'] -= learning_rate * db2 
        
        
    def loss(self,y_pred, y, m):
        y_pred = y_pred.ravel()
        return (1/m) * sum(-y * np.log(y_pred) - (1 - y) * np.log(1 - y_pred))
        
    def fit(self, X, y):
        
        n0 = X.shape[0]
        m = X.shape[1]
        
        #normalization
        X = 1/X.max() * X
        
        self.params['W1'] = np.random.randn(n0, 4)
        self.params['b1'] = np.random.randn(4,1)
        self.params['W2'] = np.random.randn(4,1)
        self.params['b2'] = np.random.randn(1,1)
        
        z1, a1, z2, a2 = self.forward(X)
        
        while self.loss(a2, y, m) > 0.01:
            print("loss: ", self.loss(a2, y, m))
            self.backward(X, y, 0.01, z1, a1, z2, a2)
            z1, a1, z2, a2 = self.forward(X)
            
        return self.params
    
    def predict(self, X):
        X = 1/X.max() * X
        z1, a1, z2, a2 = self.forward(X)
        return np.where(a2 > 0.5, 1, 0).ravel()
    

In [179]:
model = twoLayerNet()
model.fit(X_train.T,y_train)

loss:  0.8089477471029587
loss:  0.3660634891446625
loss:  0.3349869905378522
loss:  0.3263086988593461
loss:  0.3219721337319788
loss:  0.3192265718275978
loss:  0.3171328885013937
loss:  0.31542298779636174
loss:  0.3139536964815555
loss:  0.31262984865110804
loss:  0.3113989242477312
loss:  0.3102389787271168
loss:  0.3091418380753673
loss:  0.3081010146360993
loss:  0.30710931924399165
loss:  0.30615892308807774
loss:  0.30524125498438864
loss:  0.30434826722938796
loss:  0.30347334886781613
loss:  0.30261199493750646
loss:  0.30176314889357686
loss:  0.30092981785836886
loss:  0.300115987815994
loss:  0.2993221374193032
loss:  0.29854533540918915
loss:  0.2977821192010881
loss:  0.29702989491683324
loss:  0.2962869578872414
loss:  0.2955522094391109
loss:  0.29482492685511197
loss:  0.294104613916612
loss:  0.2933909056447848
loss:  0.2926835074774205
loss:  0.2919821584363376
loss:  0.2912866118642283
loss:  0.2905966281434352
loss:  0.28991197425650683
loss:  0.28923242619560424

KeyboardInterrupt: 

In [167]:
pred_y = model.predict(X_test.T)

In [177]:
print("Score: ",np.where(pred_y == y_test,1,0).sum()/len(y_test))

Score:  0.9988662131519275


In [170]:
confusion_matrix(y_test, pred_y)

array([[821,   0],
       [  2, 941]], dtype=int64)

In [174]:
#open new file save the model and weighs using pickle
import pickle
pickle.dump(model, open('model.pkl','wb'))


In [176]:
#open pickle model file
with open('model.pkl', 'rb') as file:
    model = pickle.load(file)

In [52]:
test1 = np.random.rand(3)
test2 = np.random.randint(0, 2, 1)
model.loss(test1,test2,3)

0.4888401440823957

In [36]:
np.random.rand(3)

array([0.24790999, 0.98393852, 0.66169522])

In [25]:
test1 = np.random.rand(2, 4)
test2 = np.random.randint(0, 1, (2, 4))
model.sigmoid(np.array([[0.5, 0.5, 0.5, 0.5, 0.5],[0.5, 0.5, 0.5, 0.5, 0.5]]))

array([[0.62245933, 0.62245933, 0.62245933, 0.62245933, 0.62245933],
       [0.62245933, 0.62245933, 0.62245933, 0.62245933, 0.62245933]])

In [17]:
test1 = np.random.rand(2, 4)
test2 = np.random.randint(0, 1, (2, 4))

#test2 = np.random.randn(4)

In [49]:
(np.conctest1 + test2).shape

AttributeError: module 'numpy' has no attribute 'conctest1'

In [96]:
X.max()

255