In [1]:
import numpy as np
import random


class NN:
    
    def __init__(self, nlayers, actv_func, last_actv_func):
        self.weights = []
        self.outputs = []
        self.activations = [self.sigmoid][actv_func]
        self.gradient = [self.sigmoid_grad, self.sigmoid_grad]
        self.final = 0
        self.nb_layers = int(len(nlayers))
        for i in range(len(nlayers)-1):   #niepotrzebny jest ten loop
            self.weights.append((np.random.rand(nlayers[i],nlayers[i+1])-0.5)*2)
            self.outputs.append(np.zeros(nlayers[i]))
        self.outputs.append(np.zeros(nlayers[self.nb_layers-1]))
                 
    def softmax(self, x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)
    
    def softmax_grad(self, softmax):
    # function and explanation from https://stackoverflow.com/questions/54976533/derivative-of-softmax-function-in-python
        s = softmax.reshape(-1,1)
        return np.diagflat(s) - np.dot(s, s.T)
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_grad(self, x):
        return (self.sigmoid(x) * (1 - self.sigmoid(x)))

    def relu(self, a):
        return np.array(list(map(lambda d: max(0,d),a)))
    
    def relu_grad(self,a):
        return (a > 0) * 1
            
    def calc_output(self, iput):
        y_prev = iput
        self.outputs[0] = np.array(iput)
        if len(iput) != self.weights[0].shape[0]:
            print('Not enough or too much inputs!')
        else:
            for i in range(1,self.nb_layers):
                self.outputs[i] = np.matmul(np.transpose(self.weights[i-1]),y_prev.reshape(-1, 1))
                y_prev = self.activations(self.outputs[i])
        #self.final = self.softmax(self.outputs[self.nb_layers-1])
        self.final = self.sigmoid(self.outputs[self.nb_layers-1])
        
    def learn(self, factor, iterations, inputs, expected_outputs):
        for _ in range(iterations):
            c = list(zip(inputs.copy(), expected_outputs.copy()))
            random.shuffle(c)
            for i,j in c:
                self.calc_output(i)
                errors = j.reshape(-1, 1) - self.final
                for k in range(1,self.nb_layers):           #backpropagation https://stats.stackexchange.com/questions/267576/matrix-representation-of-softmax-derivatives-in-backpropagation
                    if (k==1):
                        #d = np.matmul((self.gradient[(k>1)*1](self.outputs[-k])),errors)
                        d = errors * (self.gradient[(k>1)*1](self.outputs[-k]).reshape(-1, 1))
                    else:
                        d = errors * (self.gradient[(k>1)*1](self.outputs[-k]).reshape(-1, 1))
                    dW = np.matmul((self.sigmoid(self.outputs[-(k+1)])).reshape(-1, 1),d.T)
                    #print('dW')
                    #print(dW)
                    if k+1 != self.nb_layers:
                        errors = np.matmul(self.weights[-k],d)
                    self.weights[-k] = self.weights[-k] + (factor*dW)

    

In [6]:
x = NN([4,5,6,3], 0,0)
x.learn(0.01,1000,(data.data).copy(),(target).copy())

In [7]:
for i in range(150):
    x.calc_output(data.data[i])
    print(x.final)

[[9.30162137e-01]
 [1.32325531e-01]
 [3.64294280e-04]]
[[9.33143751e-01]
 [1.38080989e-01]
 [3.68416407e-04]]
[[9.32130664e-01]
 [1.31561156e-01]
 [3.62266487e-04]]
[[9.25700641e-01]
 [1.39878080e-01]
 [3.85799970e-04]]
[[9.28239448e-01]
 [1.31037297e-01]
 [3.66134678e-04]]
[[9.20921870e-01]
 [1.41022584e-01]
 [3.90331315e-04]]
[[9.25911855e-01]
 [1.34649432e-01]
 [3.77964187e-04]]
[[9.27454032e-01]
 [1.36327201e-01]
 [3.74825056e-04]]
[[9.28229921e-01]
 [1.38933894e-01]
 [3.82307282e-04]]
[[9.29452083e-01]
 [1.37814287e-01]
 [3.73954071e-04]]
[[9.28199458e-01]
 [1.33880856e-01]
 [3.68370156e-04]]
[[9.2239549e-01]
 [1.3921431e-01]
 [3.8842747e-04]]
[[9.32348612e-01]
 [1.35402740e-01]
 [3.66399958e-04]]
[[9.36036853e-01]
 [1.24233071e-01]
 [3.47665983e-04]]
[[9.35242789e-01]
 [1.24534069e-01]
 [3.43311560e-04]]
[[9.24050243e-01]
 [1.30784009e-01]
 [3.70310314e-04]]
[[9.31567590e-01]
 [1.29326018e-01]
 [3.57061646e-04]]
[[9.30198356e-01]
 [1.34173463e-01]
 [3.67018122e-04]]
[[9.24796481e

In [18]:
x.calc_output([5,6,1,8])
print(x.final)

[0.2287993  0.33321917 0.4343506 ]


In [129]:
x.gradient[0](np.array([0.5,0.3,0.2]))

array([[ 0.25, -0.15, -0.1 ],
       [-0.15,  0.21, -0.06],
       [-0.1 , -0.06,  0.16]])

In [7]:
x = np.array([[1],[2],[1]])
y = np.array([[6],[3],[7]])

In [3]:
from sklearn.datasets import load_iris
data = load_iris()
target = np.zeros((data.target.shape[0],3))
for i in range(data.target.shape[0]):
    target[i][data.target[i]] = 1

In [4]:
x.sigmoid_grad(2.17571348)

-2.558015667053711

In [10]:
x*y

array([[6],
       [6],
       [7]])

In [55]:
import random
random.shuffle(list(zip(x,y)))

None


In [52]:
c

[(1, 5), (3, 7), (4, 8), (2, 6)]

In [12]:
softmaxx(np.array([ 88.79364469, 111.95757656, 107.40550825]))

AssertionError: 

In [11]:
def softmaxx(z):
    assert len(z.shape) == 2
    s = np.max(z, axis=1)
    s = s[:, np.newaxis] # necessary step to do broadcasting
    e_x = np.exp(z - s)
    div = np.sum(e_x, axis=1)
    div = div[:, np.newaxis] # dito
    return e_x / div

In [21]:
np.matmul(B,A)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 2)

In [152]:
A = np.array([[1,2],[3,4]])

In [153]:
B=np.array([2,3])

In [154]:
A*B

array([[ 2,  6],
       [ 6, 12]])