In [16]:
import numpy as np
import pandas as pd
import struct
from array import array
from os.path import join

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath, training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        # Read labels
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError(f'Magic number mismatch, expected 2049, got {magic}')
            labels = np.array(array("B", file.read()), dtype=np.uint8)
        
        # Read images
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError(f'Magic number mismatch, expected 2051, got {magic}')
            image_data = np.array(array("B", file.read()), dtype=np.uint8)
            images = image_data.reshape(size, rows * cols)  # Flatten each image to 1D
            
        return images, labels
            
    def load_data(self):
        # Load training and test data
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        
        # Convert to DataFrames
        train_df = pd.DataFrame(x_train)
        train_df['label'] = y_train
        test_df = pd.DataFrame(x_test)
        test_df['label'] = y_test
        
        return train_df, test_df

# Example usage:
# Initialize the data loader with paths to the MNIST dataset files

input_path = r'MNIST'
train_images_path = join(input_path, 'train-images.idx3-ubyte')
train_labels_path = join(input_path, 'train-labels.idx1-ubyte')
test_images_path = join(input_path, 't10k-images.idx3-ubyte')
test_labels_path = join(input_path, 't10k-labels.idx1-ubyte')

mnist_loader = MnistDataloader(train_images_path, train_labels_path, test_images_path, test_labels_path)
train_df, test_df = mnist_loader.load_data()

# Display the first few rows of the training DataFrame
print(train_df.head())


   0  1  2  3  4  5  6  7  8  9  ...  775  776  777  778  779  780  781  782  \
0  0  0  0  0  0  0  0  0  0  0  ...    0    0    0    0    0    0    0    0   
1  0  0  0  0  0  0  0  0  0  0  ...    0    0    0    0    0    0    0    0   
2  0  0  0  0  0  0  0  0  0  0  ...    0    0    0    0    0    0    0    0   
3  0  0  0  0  0  0  0  0  0  0  ...    0    0    0    0    0    0    0    0   
4  0  0  0  0  0  0  0  0  0  0  ...    0    0    0    0    0    0    0    0   

   783  label  
0    0      5  
1    0      0  
2    0      4  
3    0      1  
4    0      9  

[5 rows x 785 columns]


In [109]:
import pandas as pd
from NN import *

def f1_score_manual(y_true, y_pred, num_classes):
    f1_scores = []
    for c in range(1, num_classes + 1): 
        tp = np.sum((y_pred == c) & (y_true == c))
        fp = np.sum((y_pred == c) & (y_true != c))
        fn = np.sum((y_pred != c) & (y_true == c))
        
        
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        
        
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        f1_scores.append(f1)
    
    
    return np.mean(f1_scores)

def classification_metrics_multimodal(y_true, y_pred, num_classes):
    f1_scores = []
    precision_scores = []
    recall_scores = []
    
    total_tp = 0
    total_fp = 0
    total_fn = 0
    total_tn = 0
    
    for c in range(num_classes + 1):
        tp = np.sum((y_pred == c) & (y_true == c))
        fp = np.sum((y_pred == c) & (y_true != c))
        fn = np.sum((y_pred != c) & (y_true == c))
        tn = np.sum((y_pred != c) & (y_true != c))
        
        total_tp += tp
        total_fp += fp
        total_fn += fn
        total_tn += tn

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)
    
    accuracy = (total_tp + total_tn) / (total_tp + total_fp + total_fn + total_tn)

    return {
        'f1_score': np.mean(f1_scores),
        'precision': np.mean(precision_scores),
        'recall': np.mean(recall_scores),
        'accuracy': accuracy
    }


In [4]:
# Separate features (x) and labels (y) for the training set
x_train = np.array(train_df.iloc[:, :-1]) / 255.0 - 0.5  # All columns except the last one (pixels)
y_train = np.array(train_df.iloc[:, -1])   # Only the last column (label)
num_classes = 10

one_hot_encoded = np.zeros((y_train.size, num_classes))
one_hot_encoded[np.arange(y_train.size), y_train] = 1

one_hot_encoded = one_hot_encoded.reshape((-1, num_classes))


# Separate features (x) and labels (y) for the test set
x_test = np.array(test_df.iloc[:, :-1]) / 255.0 - 0.5    # All columns except the last one (pixels)
y_test = np.array(test_df.iloc[:, -1])     # Only the last column (label)

# Print shapes to confirm
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)



x_train shape: (60000, 784)
y_train shape: (60000,)
x_test shape: (10000, 784)
y_test shape: (10000,)


In [22]:
print("Min value after normalization:", np.min(x_train))
print("Max value after normalization:", np.max(x_train))

Min value after normalization: -0.5
Max value after normalization: 0.5


In [38]:
nn2 = NN(input_shape=(0,784))
nn2.add_new_random_layer(512, ELU())
nn2.add_new_random_layer(256, ELU())
nn2.add_new_random_layer(128, ELU())
nn2.add_new_random_layer(64, ELU())
nn2.add_new_random_layer(32, ELU())
nn2.add_new_random_layer(10, activation=Linear())

LAYER(
W:
 array([[ 0.11359341,  0.04995465, -0.25086087,  0.2293794 ,  0.27183058,
         0.15873696,  0.26188476,  0.09004442,  0.45500041, -0.4557296 ],
       [ 0.37867135,  0.07926232, -0.19383162,  0.11764629, -0.0864343 ,
         0.41920626,  0.37354822,  0.30111446, -0.09351347, -0.48402478],
       [ 0.10235332,  0.04292351,  0.44519048, -0.27428107,  0.32818588,
         0.499734  ,  0.31685498,  0.37724845,  0.17369897, -0.19601139],
       [-0.4561818 ,  0.14640404, -0.17930498, -0.06029895,  0.22406984,
         0.38261436, -0.03216549,  0.04779243,  0.01165746, -0.1128867 ],
       [ 0.25887141, -0.49415542,  0.08589023, -0.07140001,  0.36760729,
        -0.19252615, -0.41508678, -0.14275244, -0.13388345, -0.16097577],
       [-0.17274275, -0.23964045,  0.30577109, -0.37573866, -0.3264826 ,
        -0.28386785,  0.39014075, -0.34436924, -0.41161337, -0.24638689],
       [ 0.34845095,  0.11024399, -0.48031743, -0.32215286,  0.07698506,
         0.33013916, -0.24917335, 

In [None]:
nn3 = NN(input_shape=(0,784))
nn3.add_new_random_layer(256, ReLU())
nn3.add_new_random_layer(128, ReLU())
nn3.add_new_random_layer(32, ReLU())
nn3.add_new_random_layer(10, activation=Linear())

LAYER(
W:
 array([[-1.49811360e-01, -2.64761935e-01,  1.00833881e-01,
         2.35877576e-01,  1.16405303e-01,  2.72543265e-01,
        -4.80450321e-01, -2.61789049e-01, -2.14862710e-01,
        -4.85603363e-01],
       [ 8.68068820e-02, -1.84359915e-01, -3.27911639e-02,
        -4.80897373e-01, -1.40861646e-01, -4.01675625e-01,
         4.05956068e-01,  5.88592468e-02,  4.73374879e-02,
         4.53161198e-01],
       [ 1.15521091e-01,  2.44863030e-01, -3.58706986e-01,
         1.52070963e-01, -3.37472123e-01, -3.63732484e-02,
        -2.69750120e-01,  9.17402704e-02,  9.47420652e-02,
        -1.60615064e-01],
       [ 7.44137343e-03, -4.63330004e-01,  3.63409166e-01,
        -1.50358437e-01, -1.54406795e-01, -2.26925901e-01,
         1.24736473e-01,  1.09100205e-01,  3.22931788e-01,
        -1.32558777e-01],
       [ 3.79750951e-01,  2.37996979e-02,  4.89964871e-01,
         2.40996154e-01, -2.60795619e-01,  4.47281340e-01,
        -1.24232781e-01, -7.92735442e-02,  4.14149580e-01,


In [116]:
def train_network(nn, EPOCHS, LR=0.001):
    LR_DECRESE = 1.3
    ERR_STEP = 1000
    ERR_SAMPLES = 100

    loss = LogisticCrossEntropy(from_logits=True)
    errors = []
    f1_scores = []
    print("STARTING TRAINING PROCESS\n")
    for i in range(EPOCHS):
        nn.batch_descent(x_train, one_hot_encoded, loss=loss,batch_size=len(x_train)//1000, rate=LR)
        if (i % ERR_STEP) == 0 and i > 0:
            LR /= LR_DECRESE
            err = 0.0
            for _ in range(ERR_SAMPLES):
                index = np.random.randint(x_train.shape[0])
                yhat_i = nn.apply(x_train[index].reshape(1, -1))
                err += loss(yhat_i, one_hot_encoded[index])
            err /= ERR_SAMPLES
            errors.append(err)
            print(f"{i=} {err=:.3f} {LR=}")
    print("TRAINING PROCESS CONCLUDED")
    y_pred_train = nn.apply(x_train)
    train_pred_classes = np.argmax(y_pred_train, axis=1)
    scores_train = classification_metrics_multimodal(y_train, train_pred_classes, num_classes)
    acc_train = scores_train["accuracy"]
    f1_train = scores_train["f1_score"]
    print(f"F1 Score (train): {f1_train:.5}")
    print(f"Accuracy (train): {acc_train:.5}")
    
    y_pred_test = nn.apply(x_test)
    test_pred_classes = np.argmax(y_pred_test, axis=1)
    scores = classification_metrics_multimodal(y_test, test_pred_classes, num_classes)
    acc_test = scores["accuracy"]
    f1_test = scores["f1_score"]
    print(f"F1 Score (test): {f1_test:.5}")
    print(f"Accuracy (test): {acc_test:.5}")
    

    return nn, errors

In [None]:
LR=0.00001
LR_DECRESE = 0
ERR_STEP = 1000
ERR_SAMPLES = 100

loss = LogisticCrossEntropy(from_logits=True)
errors = []
f1_scores = []
for i in range(300_000):
    nn3.stochastic_descent(x_train, one_hot_encoded, rate=LR, loss=loss)
    if (i % ERR_STEP) == 0 and i > 0:
        #LR /= LR_DECRESE
        err = 0.0
        for _ in range(ERR_SAMPLES):
            index = np.random.randint(x_train.shape[0])
            yhat_i = nn3.apply(x_train[index].reshape(1, -1))
            err += loss(yhat_i, one_hot_encoded[index])
        err /= ERR_SAMPLES
        errors.append(err)
        print(f"{i=} {err=:.3f} {LR=}")

#plt.plot(range(len(errors)), errors, label='Cross-Entropy Loss')
#plt.title("Training Loss")
#plt.xlabel("Epochs")
#plt.ylabel("Loss")
#plt.legend()
#plt.show()

i=1000 err=0.064 LR=1e-05
i=2000 err=0.071 LR=1e-05
i=3000 err=0.034 LR=1e-05
i=4000 err=0.049 LR=1e-05
i=5000 err=0.075 LR=1e-05
i=6000 err=0.065 LR=1e-05
i=7000 err=0.065 LR=1e-05
i=8000 err=0.052 LR=1e-05
i=9000 err=0.062 LR=1e-05
i=10000 err=0.057 LR=1e-05
i=11000 err=0.036 LR=1e-05
i=12000 err=0.048 LR=1e-05
i=13000 err=0.073 LR=1e-05
i=14000 err=0.056 LR=1e-05
i=15000 err=0.043 LR=1e-05
i=16000 err=0.052 LR=1e-05
i=17000 err=0.043 LR=1e-05
i=18000 err=0.031 LR=1e-05
i=19000 err=0.044 LR=1e-05
i=20000 err=0.036 LR=1e-05
i=21000 err=0.075 LR=1e-05
i=22000 err=0.055 LR=1e-05
i=23000 err=0.059 LR=1e-05
i=24000 err=0.051 LR=1e-05
i=25000 err=0.045 LR=1e-05
i=26000 err=0.075 LR=1e-05
i=27000 err=0.061 LR=1e-05
i=28000 err=0.037 LR=1e-05
i=29000 err=0.053 LR=1e-05
i=30000 err=0.046 LR=1e-05
i=31000 err=0.077 LR=1e-05
i=32000 err=0.050 LR=1e-05
i=33000 err=0.080 LR=1e-05
i=34000 err=0.055 LR=1e-05
i=35000 err=0.047 LR=1e-05
i=36000 err=0.074 LR=1e-05
i=37000 err=0.044 LR=1e-05
i=38000 er

In [58]:
y_pred_train = nn3.apply(x_train)
train_pred_classes = np.argmax(y_pred_train, axis=1)
train_f1_score = f1_score_manual(y_train, train_pred_classes, num_classes)
print(f"F1 Score (train): {train_f1_score:.5}")

y_pred_test = nn3.apply(x_test)
test_pred_classes = np.argmax(y_pred_test, axis=1)
test_f1_score = f1_score_manual(y_test, test_pred_classes, num_classes)
print(f"F1 Score (test): {test_f1_score:.5}")

F1 Score (train): 0.80423
F1 Score (test): 0.80343


In [60]:
nn4 = NN(input_shape=(0,784))
nn4.add_new_random_layer(256, ReLU())
nn4.add_new_random_layer(128, ReLU())
nn4.add_new_random_layer(32, ReLU())
nn4.add_new_random_layer(10, activation=Linear())
train_network(nn4)

i=1000 err=0.292 LR=0.001
i=2000 err=0.238 LR=0.001
i=3000 err=0.181 LR=0.001
i=4000 err=0.220 LR=0.001
i=5000 err=0.200 LR=0.001
i=6000 err=0.217 LR=0.001
i=7000 err=0.141 LR=0.001
i=8000 err=0.226 LR=0.001
i=9000 err=0.135 LR=0.001
i=10000 err=0.144 LR=0.001
i=11000 err=0.159 LR=0.001
i=12000 err=0.148 LR=0.001
i=13000 err=0.127 LR=0.001
i=14000 err=0.122 LR=0.001
i=15000 err=0.100 LR=0.001
i=16000 err=0.120 LR=0.001
i=17000 err=0.138 LR=0.001
i=18000 err=0.143 LR=0.001
i=19000 err=0.102 LR=0.001
i=20000 err=0.158 LR=0.001
i=21000 err=0.116 LR=0.001
i=22000 err=0.109 LR=0.001
i=23000 err=0.115 LR=0.001
i=24000 err=0.102 LR=0.001
i=25000 err=0.114 LR=0.001
i=26000 err=0.113 LR=0.001
i=27000 err=0.092 LR=0.001
i=28000 err=0.108 LR=0.001
i=29000 err=0.092 LR=0.001
i=30000 err=0.096 LR=0.001
i=31000 err=0.068 LR=0.001
i=32000 err=0.103 LR=0.001
i=33000 err=0.083 LR=0.001
i=34000 err=0.119 LR=0.001
i=35000 err=0.105 LR=0.001
i=36000 err=0.067 LR=0.001
i=37000 err=0.065 LR=0.001
i=38000 er

(<NN.NN at 0x289004bb760>,
 [np.float64(0.29233519035346134),
  np.float64(0.2379613080730601),
  np.float64(0.1810000633304611),
  np.float64(0.21992411200766004),
  np.float64(0.2004341955419619),
  np.float64(0.21655357977849157),
  np.float64(0.14101181116825834),
  np.float64(0.2263570276850054),
  np.float64(0.135056213490291),
  np.float64(0.14353684386346044),
  np.float64(0.15854561844236578),
  np.float64(0.14754747037013047),
  np.float64(0.12715539361209838),
  np.float64(0.12163513021375079),
  np.float64(0.10032172387327502),
  np.float64(0.12020122978361919),
  np.float64(0.13782262916825844),
  np.float64(0.1433909089150319),
  np.float64(0.10210440882062494),
  np.float64(0.1576925795814944),
  np.float64(0.11574934957005736),
  np.float64(0.1089788568930203),
  np.float64(0.1154600066099068),
  np.float64(0.10184481372514961),
  np.float64(0.11406718793999775),
  np.float64(0.11337689961317876),
  np.float64(0.09186468419740139),
  np.float64(0.10790491784262433),
  n

In [68]:
train_network(nn4,EPOCHS=200_000)

i=1000 err=0.039 LR=0.001
i=2000 err=0.022 LR=0.001
i=3000 err=0.019 LR=0.001
i=4000 err=0.026 LR=0.001
i=5000 err=0.010 LR=0.001
i=6000 err=0.027 LR=0.001
i=7000 err=0.019 LR=0.001
i=8000 err=0.023 LR=0.001
i=9000 err=0.021 LR=0.001
i=10000 err=0.038 LR=0.001
i=11000 err=0.020 LR=0.001
i=12000 err=0.022 LR=0.001
i=13000 err=0.014 LR=0.001
i=14000 err=0.028 LR=0.001
i=15000 err=0.007 LR=0.001
i=16000 err=0.068 LR=0.001
i=17000 err=0.021 LR=0.001
i=18000 err=0.009 LR=0.001
i=19000 err=0.050 LR=0.001
i=20000 err=0.011 LR=0.001
i=21000 err=0.021 LR=0.001
i=22000 err=0.014 LR=0.001
i=23000 err=0.046 LR=0.001
i=24000 err=0.023 LR=0.001
i=25000 err=0.034 LR=0.001
i=26000 err=0.023 LR=0.001
i=27000 err=0.013 LR=0.001
i=28000 err=0.015 LR=0.001
i=29000 err=0.023 LR=0.001
i=30000 err=0.039 LR=0.001
i=31000 err=0.030 LR=0.001
i=32000 err=0.011 LR=0.001
i=33000 err=0.009 LR=0.001
i=34000 err=0.019 LR=0.001
i=35000 err=0.031 LR=0.001
i=36000 err=0.027 LR=0.001
i=37000 err=0.037 LR=0.001
i=38000 er

(<NN.NN at 0x289004bb760>,
 [np.float64(0.039020536514439645),
  np.float64(0.021975863969269992),
  np.float64(0.01923965994267673),
  np.float64(0.026368648326785964),
  np.float64(0.00976748713972881),
  np.float64(0.026931384107023865),
  np.float64(0.01887395540183448),
  np.float64(0.02271827218901384),
  np.float64(0.021107768364206848),
  np.float64(0.037818510885448096),
  np.float64(0.020025493764258998),
  np.float64(0.02159950855310463),
  np.float64(0.014256709413636865),
  np.float64(0.02818882906767693),
  np.float64(0.007377849219512072),
  np.float64(0.06792566719079683),
  np.float64(0.020501547554096652),
  np.float64(0.008959993905603873),
  np.float64(0.04955939670618906),
  np.float64(0.010516217500842383),
  np.float64(0.02143634129189969),
  np.float64(0.014292238752443731),
  np.float64(0.04613019083458857),
  np.float64(0.023178730892626592),
  np.float64(0.033796111700699336),
  np.float64(0.022887042150193805),
  np.float64(0.013352389903001258),
  np.float6

In [77]:
nn6 = NN(input_shape=(0,784))
nn6.add_new_random_layer(64, ELU())
nn6.add_new_random_layer(256, ELU())
nn6.add_new_random_layer(128, ELU())
nn6.add_new_random_layer(10, activation=Linear())
train_network(nn6, EPOCHS=700_000, LR=0.005)

i=1000 err=0.219 LR=0.005
i=2000 err=0.142 LR=0.005
i=3000 err=0.162 LR=0.005
i=4000 err=0.168 LR=0.005
i=5000 err=0.116 LR=0.005
i=6000 err=0.192 LR=0.005
i=7000 err=0.145 LR=0.005
i=8000 err=0.094 LR=0.005
i=9000 err=0.125 LR=0.005
i=10000 err=0.090 LR=0.005
i=11000 err=0.109 LR=0.005
i=12000 err=0.190 LR=0.005
i=13000 err=0.059 LR=0.005
i=14000 err=0.114 LR=0.005
i=15000 err=0.079 LR=0.005
i=16000 err=0.127 LR=0.005
i=17000 err=0.033 LR=0.005
i=18000 err=0.076 LR=0.005
i=19000 err=0.063 LR=0.005
i=20000 err=0.087 LR=0.005
i=21000 err=0.161 LR=0.005
i=22000 err=0.103 LR=0.005
i=23000 err=0.093 LR=0.005
i=24000 err=0.092 LR=0.005
i=25000 err=0.081 LR=0.005
i=26000 err=0.046 LR=0.005
i=27000 err=0.079 LR=0.005
i=28000 err=0.056 LR=0.005
i=29000 err=0.069 LR=0.005
i=30000 err=0.082 LR=0.005
i=31000 err=0.042 LR=0.005
i=32000 err=0.061 LR=0.005
i=33000 err=0.049 LR=0.005
i=34000 err=0.079 LR=0.005
i=35000 err=0.056 LR=0.005
i=36000 err=0.083 LR=0.005
i=37000 err=0.068 LR=0.005
i=38000 er

(<NN.NN at 0x289006ff820>,
 [np.float64(0.21918410050656012),
  np.float64(0.14227187261151786),
  np.float64(0.16182383393595384),
  np.float64(0.16750368022749437),
  np.float64(0.1163439850094995),
  np.float64(0.19179747114235163),
  np.float64(0.1447841042004103),
  np.float64(0.09371546709241492),
  np.float64(0.12538574820452408),
  np.float64(0.08963907984732108),
  np.float64(0.10852561426107198),
  np.float64(0.19013568166965622),
  np.float64(0.05925425682200376),
  np.float64(0.1139605216655229),
  np.float64(0.07888212894440674),
  np.float64(0.12655977607713387),
  np.float64(0.032657653976959924),
  np.float64(0.07638199415793459),
  np.float64(0.06335308060217097),
  np.float64(0.08727249796216277),
  np.float64(0.16123822998429285),
  np.float64(0.1028861143240595),
  np.float64(0.09287973962278617),
  np.float64(0.09224827758934814),
  np.float64(0.08125324145690144),
  np.float64(0.04582494481918029),
  np.float64(0.0792573433287101),
  np.float64(0.05639813826762111

In [111]:
train_network(nn6, EPOCHS=700_000, LR=0.05)

STARTING TRAINING PROCESS

i=1000 err=0.971 LR=0.038461538461538464
i=2000 err=0.710 LR=0.02958579881656805
i=3000 err=0.523 LR=0.02275830678197542
i=4000 err=0.407 LR=0.017506389832288784
i=5000 err=0.415 LR=0.013466453717145218
i=6000 err=0.397 LR=0.010358810551650168
i=7000 err=0.371 LR=0.007968315808961667
i=8000 err=0.383 LR=0.006129473699201282
i=9000 err=0.342 LR=0.00471497976861637
i=10000 err=0.344 LR=0.0036269075143202847
i=11000 err=0.330 LR=0.0027899288571694497
i=12000 err=0.330 LR=0.0021460991208995766
i=13000 err=0.329 LR=0.001650845477615059
i=14000 err=0.334 LR=0.0012698811366269685
i=15000 err=0.330 LR=0.0009768316435592064
i=16000 err=0.327 LR=0.0007514089565840049
i=17000 err=0.327 LR=0.0005780068896800038
i=18000 err=0.326 LR=0.00044462068436923366
i=19000 err=0.328 LR=0.00034201591105325664
i=20000 err=0.327 LR=0.0002630891623486589
i=21000 err=0.326 LR=0.00020237627872973763
i=22000 err=0.328 LR=0.00015567406056133664
i=23000 err=0.326 LR=0.00011974927735487433
i

KeyboardInterrupt: 

In [76]:
nn7 = NN(input_shape=(0,784))
nn7.add_new_random_layer(64, GELU())
nn7.add_new_random_layer(256, GELU())
nn7.add_new_random_layer(128, GELU())
nn7.add_new_random_layer(10, activation=Linear())
train_network(nn7, EPOCHS=700_000, LR=0.005)

i=1000 err=0.264 LR=0.005
i=2000 err=0.176 LR=0.005
i=3000 err=0.166 LR=0.005
i=4000 err=0.174 LR=0.005
i=5000 err=0.116 LR=0.005
i=6000 err=0.126 LR=0.005
i=7000 err=0.188 LR=0.005
i=8000 err=0.110 LR=0.005
i=9000 err=0.085 LR=0.005
i=10000 err=0.073 LR=0.005
i=11000 err=0.104 LR=0.005
i=12000 err=0.153 LR=0.005
i=13000 err=0.107 LR=0.005
i=14000 err=0.101 LR=0.005
i=15000 err=0.099 LR=0.005
i=16000 err=0.087 LR=0.005
i=17000 err=0.089 LR=0.005
i=18000 err=0.081 LR=0.005
i=19000 err=0.067 LR=0.005
i=20000 err=0.095 LR=0.005
i=21000 err=0.049 LR=0.005
i=22000 err=0.099 LR=0.005
i=23000 err=0.053 LR=0.005
i=24000 err=0.121 LR=0.005
i=25000 err=0.058 LR=0.005
i=26000 err=0.098 LR=0.005
i=27000 err=0.068 LR=0.005
i=28000 err=0.075 LR=0.005
i=29000 err=0.068 LR=0.005
i=30000 err=0.058 LR=0.005
i=31000 err=0.082 LR=0.005
i=32000 err=0.063 LR=0.005
i=33000 err=0.057 LR=0.005
i=34000 err=0.065 LR=0.005
i=35000 err=0.054 LR=0.005
i=36000 err=0.083 LR=0.005
i=37000 err=0.065 LR=0.005
i=38000 er

(<NN.NN at 0x28900518280>,
 [np.float64(0.2641913617237302),
  np.float64(0.17566492196052544),
  np.float64(0.1655419003198893),
  np.float64(0.1735516629415105),
  np.float64(0.115791396446645),
  np.float64(0.1258272009070558),
  np.float64(0.18771795881939282),
  np.float64(0.10957496758542506),
  np.float64(0.08545462613853226),
  np.float64(0.07253422965224442),
  np.float64(0.1037125575129621),
  np.float64(0.15334130868752932),
  np.float64(0.10710850933725537),
  np.float64(0.10065526701642556),
  np.float64(0.09904792224330487),
  np.float64(0.08660000564189402),
  np.float64(0.08890978486490589),
  np.float64(0.08069572525916258),
  np.float64(0.06739264636433362),
  np.float64(0.09548181335144541),
  np.float64(0.04859571247787588),
  np.float64(0.09872980061324943),
  np.float64(0.05272199232084194),
  np.float64(0.12091446686840808),
  np.float64(0.057502904662170666),
  np.float64(0.09836274662791007),
  np.float64(0.06811025563004215),
  np.float64(0.07455721785841714),

In [110]:
train_network(nn7, EPOCHS=700_000, LR=0.005)

STARTING TRAINING PROCESS

i=1000 err=0.031 LR=0.003846153846153846
i=2000 err=0.012 LR=0.0029585798816568047
i=3000 err=0.004 LR=0.002275830678197542
i=4000 err=0.004 LR=0.0017506389832288784
i=5000 err=0.009 LR=0.0013466453717145218
i=6000 err=0.000 LR=0.0010358810551650169
i=7000 err=0.001 LR=0.0007968315808961668
i=8000 err=0.000 LR=0.0006129473699201282
i=9000 err=0.014 LR=0.0004714979768616371
i=10000 err=0.001 LR=0.0003626907514320285
i=11000 err=0.001 LR=0.000278992885716945
i=12000 err=0.007 LR=0.00021460991208995767
i=13000 err=0.001 LR=0.0001650845477615059
i=14000 err=0.002 LR=0.00012698811366269685
i=15000 err=0.001 LR=9.768316435592064e-05
i=16000 err=0.000 LR=7.514089565840049e-05
i=17000 err=0.000 LR=5.7800688968000376e-05
i=18000 err=0.001 LR=4.446206843692337e-05
i=19000 err=0.001 LR=3.4201591105325664e-05
i=20000 err=0.001 LR=2.6308916234865894e-05
i=21000 err=0.010 LR=2.0237627872973763e-05
i=22000 err=0.000 LR=1.5567406056133662e-05
i=23000 err=0.003 LR=1.197492773

(<NN.NN at 0x28900518280>,
 [np.float64(0.03148471083589672),
  np.float64(0.012258715791327934),
  np.float64(0.003565051168073241),
  np.float64(0.004244569930776858),
  np.float64(0.009320118140547586),
  np.float64(0.00019489095720836305),
  np.float64(0.0010076001529754012),
  np.float64(0.00033413750140813667),
  np.float64(0.013586074868394155),
  np.float64(0.001389844924384413),
  np.float64(0.0011177353095795884),
  np.float64(0.006633504460001473),
  np.float64(0.0010988788936524224),
  np.float64(0.0017814391127539753),
  np.float64(0.0007413596368321014),
  np.float64(0.0004935657434104938),
  np.float64(0.00045171195882470554),
  np.float64(0.0011593255606960069),
  np.float64(0.0011111752991863723),
  np.float64(0.0006937381350380118),
  np.float64(0.009896425142114924),
  np.float64(0.0004424130364355244),
  np.float64(0.0026284157872342386),
  np.float64(0.00044233942457788764),
  np.float64(0.0001546768931542251),
  np.float64(0.0011322806413017147),
  np.float64(0.00

In [117]:
nn8 = NN(input_shape=(0,784))
nn8.add_new_random_layer(256, GELU())
nn8.add_new_random_layer(256, GELU())
nn8.add_new_random_layer(128, GELU())
nn8.add_new_random_layer(64, GELU())
nn8.add_new_random_layer(10, activation=Linear())
train_network(nn8, EPOCHS=300_000, LR=0.005)

STARTING TRAINING PROCESS

i=1000 err=0.140 LR=0.003846153846153846
i=2000 err=0.105 LR=0.0029585798816568047
i=3000 err=0.078 LR=0.002275830678197542
i=4000 err=0.088 LR=0.0017506389832288784
i=5000 err=0.066 LR=0.0013466453717145218
i=6000 err=0.064 LR=0.0010358810551650169
i=7000 err=0.087 LR=0.0007968315808961668
i=8000 err=0.051 LR=0.0006129473699201282
i=9000 err=0.068 LR=0.0004714979768616371
i=10000 err=0.074 LR=0.0003626907514320285
i=11000 err=0.086 LR=0.000278992885716945
i=12000 err=0.107 LR=0.00021460991208995767
i=13000 err=0.077 LR=0.0001650845477615059
i=14000 err=0.061 LR=0.00012698811366269685
i=15000 err=0.048 LR=9.768316435592064e-05
i=16000 err=0.105 LR=7.514089565840049e-05
i=17000 err=0.061 LR=5.7800688968000376e-05
i=18000 err=0.075 LR=4.446206843692337e-05
i=19000 err=0.060 LR=3.4201591105325664e-05
i=20000 err=0.077 LR=2.6308916234865894e-05
i=21000 err=0.068 LR=2.0237627872973763e-05
i=22000 err=0.094 LR=1.5567406056133662e-05
i=23000 err=0.078 LR=1.197492773

KeyboardInterrupt: 

In [None]:
train_network(nn8, EPOCHS=300_000, LR=0.005)

In [None]:
#save into pickle
import pickle
nns_pickle = {'nn2': nn2, 'nn3':nn3, 'nn4': nn4, 'nn6':nn6, 'nn7':nn7, 'nn8':nn8}


file_path = r'pickled_files/saved_nn2.pkl'

with open(file_path, 'wb') as file:
    pickle.dump(file_path, file)

In [2]:
import pickle
#save into pickle

file_path = r'pickled_files/saved_nn.pkl'

with open(file_path, 'wb') as file:
    pickle.dump([nn2, nn3, nn4, nn6, nn7, nn8], file)


NameError: name 'nn2' is not defined