In [86]:
import numpy as np
from tqdm import trange

def fetch(url):
    import pathlib, requests,os, hashlib, numpy, gzip, tempfile
    fp = os.path.join(tempfile.gettempdir(), hashlib.md5(url.encode("utf-8")).hexdigest())
    if(os.path.isfile(fp)):
        with open(fp, "rb") as f:
            dat = f.read()
    else:
        with open(fp, "wb") as f:
            dat = requests.get(url).content
            f.write(dat)
    return numpy.frombuffer(gzip.decompress(dat), dtype=numpy.uint8).copy()
            



#fetch data
X_train = fetch("https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz")
Y_train = fetch("https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz")
X_test = fetch("https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz")
Y_test = fetch("https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz")

#normalize pixel
def preprocess_mnist(data):
    data = data[16:]   
    images = data.reshape(-1, 28, 28) 
    images = images.astype('float32') / 255.0
    
    return images

X_train_normalized = preprocess_mnist(X_train).reshape(-1,784)
X_test_normalized = preprocess_mnist(X_test)

#one-hot encoding
def one_hot_encoding(data):
    data = data[8:]
    
    n_labels = len(data)
    one_hot = np.zeros((n_labels, 10), dtype=np.int8)
    
    one_hot[np.arange(n_labels), data] = 1
    
    return one_hot
    
X_train = one_hot_encoding(Y_train)
Y_train = one_hot_encoding(Y_train)

class Model:
    def __init__(self, input_size, hidden_size, output_size):
        self.rng = np.random.default_rng()
        
        # Initialisierung der Gewichte mit der layer_init Funktion
        self.w1 = self.layer_init(input_size, hidden_size)
        self.w2 = self.layer_init(hidden_size, output_size)
        
        self.b1 = np.zeros((1, hidden_size), dtype=np.float32)
        self.b2 = np.zeros((1, output_size), dtype=np.float32)

    def layer_init(self, m, h):
        # Gleichverteilte Initialisierung
        ret = self.rng.uniform(-1., 1., size=(m, h)) / np.sqrt(m * h)
        return ret.astype(np.float32)

    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    def forward(self, x):
        self.z1 = np.dot(x,self.w1) + self.b1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.w2) + self.b2
        output = self.softmax(self.z2)
        return output

    @staticmethod
    def softmax(x):
        exp = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp/np.sum(exp, axis=1, keepdims=True)

    def backward(self, x, y, output):
        m = y.shape[0]
        dz2 = output - y
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m
        dz1 = np.dot(dz2, self.w2.T) * (self.a1 > 0)
        dW1 = np.dot(x.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Gewichte aktualisieren
        learning_rate = 0.01
        self.w1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.w2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, x_train, y_train, epochs):
        for epoch in (t := trange(epochs)):
            output = self.forward(x_train)
            loss = self.cross_entropy(output, y_train)
            self.backward(x_train, y_train, output)

            t.set_description("Epoch: % Loss: %" % (epoch, loss))
        
    def cross_entropy(self, predictions, targets):
        predictions = np.clip(predictions, 1e-12, 1. - 1e-12)
        N = predictions.shape[0]
        loss = -np.sum(targets * np.log(predictions)) / N
        return loss
        

ModuleNotFoundError: No module named 'tqdm'

In [84]:
nn = Model(784, 128, 10)
nn.train(X_train_normalized, Y_train, 1000)

Epoch: 0, loss: 2.3024680614471436
Epoch: 100, loss: 2.281738042831421
Epoch: 200, loss: 2.2420923709869385
Epoch: 300, loss: 2.1663458347320557
Epoch: 400, loss: 2.0303144454956055
Epoch: 500, loss: 1.8203396797180176
Epoch: 600, loss: 1.555733323097229
Epoch: 700, loss: 1.2994908094406128
Epoch: 800, loss: 1.0973271131515503
Epoch: 900, loss: 0.9495136141777039
