In [92]:
import pandas as pd
import matplotlib.pyplot as plt

# Загрузка данных
# url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
url = "./iris.csv"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv(url, names=names)
print(dataset.head(2))
transform = {'Setosa':0, 'Versicolor':1, 'Virginica':2}
dataset["class"] = dataset["class"].apply(lambda x: transform[x])

url = "./mushrooms.csv"
dataset = pd.read_csv(url)
dataset.head(10)

for column in dataset.columns[1:]:
    unics = dataset[column].unique()
    transform = {unic:i+1 for i, unic in enumerate(dataset[column].unique())}
    dataset[column] = dataset[column].apply(lambda x: transform[x])

transform = {"e":0, "p":1}
dataset["class"] = dataset["class"].apply(lambda x: transform[x])
dataset.head(10)

   sepal-length  sepal-width  petal-length  petal-width   class
0           5.1          3.5           1.4          0.2  Setosa
1           4.9          3.0           1.4          0.2  Setosa


Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,0,1,1,2,1,2,1,1,2,1,...,1,1,1,1,1,1,1,2,2,2
2,0,2,1,3,1,3,1,1,2,2,...,1,1,1,1,1,1,1,2,2,3
3,1,1,2,3,1,1,1,1,1,2,...,1,1,1,1,1,1,1,1,1,1
4,0,1,1,4,2,4,1,2,2,1,...,1,1,1,1,1,1,2,2,3,2
5,0,1,2,2,1,2,1,1,2,2,...,1,1,1,1,1,1,1,1,2,2
6,0,2,1,3,1,2,1,1,2,3,...,1,1,1,1,1,1,1,1,2,3
7,0,2,2,3,1,3,1,1,2,2,...,1,1,1,1,1,1,1,2,1,3
8,1,1,2,3,1,1,1,1,1,4,...,1,1,1,1,1,1,1,1,4,2
9,0,2,1,2,1,2,1,1,2,3,...,1,1,1,1,1,1,1,1,1,3


In [175]:
import numpy as np


class Linear:
    def __init__(self, in_neurons, out_neurons, activ_func = "sigmoid", lr=0.03):
        self.in_neurons = in_neurons
        self.out_neurons = out_neurons
        self.activ_func = { "None": lambda x : x,
                           "sigmoid" : lambda x: 1 / (1 + np.exp(-x)),
                            "softmax": self.softmax
                            }.get(activ_func, "sigmoid")
        
        self.activ_func_name = activ_func
        self.lr = lr
        self.W = np.random.random_sample((self.out_neurons, self.in_neurons))-0.5
        self.b = np.random.random_sample(out_neurons)+0.001
        self.X = None
        self.output = None

    def softmax(self, y_pred):
        #Решаем проблему огромных экспонент
        exp_pred = np.exp(y_pred - np.max(y_pred, axis=1, keepdims=True))
        # print("EXPPRED:", exp_pred)
        # exp_pred = np.exp(y_pred)
        return exp_pred / np.sum(exp_pred, axis=1, keepdims=True)

    def activ_grad(self, outgrad):
        if self.activ_func_name == "sigmoid":
            # print("OUTPUT: ", self.output)
            return (1-self.output) * self.output
        # return (1-self.activ_func(self.X)) * self.activ_func(self.X)
        elif self.activ_func_name =="softmax":
            # print("yeh")
            n = self.output.shape[1]
            # print("OUTPUT SOFTMAX IS:",self.output)
            trans_axes = (0, 2, 1)
            y_new = np.tile(self.output[:, :, np.newaxis], (1, 1, n))
            # np.matmul((np.identity(n)[np.newaxis,:]  - np.transpose(y_new, axes = trans_axes)) * y_new, 
            #           np.transpose(outgrad[:, np.newaxis], axes = trans_axes)).squeeze()
            # НЕПРАВИЛЬНЫЙ ГРАДИЕНТ ФУНКЦИИ АКТИВАЦИИ ДЛЯ SOFTMAX!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            return np.matmul(
                     (np.identity(n)[np.newaxis,:]  - np.transpose(y_new, axes = trans_axes)) * y_new, 
                      np.transpose(outgrad[:, np.newaxis], axes = trans_axes)
                      ).squeeze()
        return 1

    def forward(self, X):
        self.X = X
        self.output = self.activ_func(np.dot(X, self.W.T) + self.b)
        return self.output.copy()
    
    def backward(self, out_grad):
        # print("out_grad",out_grad)
        if self.activ_func_name != "softmax":
            out_grad = out_grad * self.activ_grad(out_grad)
        else:
            out_grad = self.activ_grad(out_grad)
            
        # print("OUTgrad after activ", out_grad)
        # print("W", self.W)
        # print("b", self.b)
        # print("X", self.X)
        # print("Out_trans", out_grad.T)
        back_grad = np.dot(out_grad, self.W)
        self.b -= np.sum(out_grad, axis=0) * self.lr
        self.W -= np.dot(out_grad.T, self.X) * self.lr
        # print("NEW W", self.W)
        return back_grad


class MLP:
    def __init__(self, n_per_layer:list, num_classes:int, active_funcs_per_layer:dict = {0: "None"}, lr:float=0.03, diff_funcs = False, mixing = True ):
        self.layers = [n_layer for n_layer in n_per_layer] + [num_classes]
        self.mixing = mixing
        self.num_classes = num_classes

        if diff_funcs == True:
            self.active_funcs = []
            flag = active_funcs_per_layer.get(0)

            for i in range(len(self.layers) - 1):
                flag = active_funcs_per_layer.get(i, flag)
                self.active_funcs.append(flag)
            
        else:
            self.active_funcs = [active_funcs_per_layer.get(0) for i in range(len(self.layers)-2)] + ["softmax"]
        
        self.layers = [Linear(self.layers[i], self.layers[i+1], activ_func=self.active_funcs[i], lr=lr) for i in range(len(self.layers) - 1)]


    def preload_y(self, y_true):
        otv = np.zeros((y_true.shape[0], self.num_classes))
        otv[np.arange(otv.shape[0]), y_true] += 1
        return otv
    

    def categirical_crossentropy(self, y_pred, y_true):
        # y_pred = self.softmax(y_pred)
        y_true = self.preload_y(y_true)

        # print(y_true)
        # print(y_pred)
        cross_ent = - y_true * np.log(y_pred) #np.sum(y_true * np.log(y_pred), axis=-1)# / y_true.shape[0]
        grad = y_pred - y_true
        # print(f"{grad=}")
        # print(f"{cross_ent=}")
        return cross_ent, grad
    
    
    
    def predict_proba(self, X):
        now_x = X.copy()
        for layer in self.layers:
            now_x = layer.forward(now_x)
        
        return now_x
    
    

    def train(self, X, y, batch_size):
        if batch_size > y.shape[0]:
            return -1
        
        losses = 0 
        if self.mixing:
            indices = np.random.permutation(X.shape[0])
            X = X[indices]
            y = y[indices]

        #Данный блок отвечает за прямое распространение и подсчёт итоговой ошибки на батче
        #================================================================
        for batch_x in range(batch_size, X.shape[0], batch_size):
            # print("now",batch_x)
            now_x = X[batch_x - batch_size: batch_x]
            # print("NOW_X IS", now_x)
            y_softmax = self.predict_proba(now_x)

            batch_loss, otv_grad = self.categirical_crossentropy(y_softmax, y[batch_x - batch_size: batch_x])
            #================================================================


            #Данынй блок - изменение параметров весов нейронки
            #================================================================
            for layer in reversed(self.layers):
                # print("ACTIVATE", layer.activ_func)
                otv_grad = layer.backward(otv_grad)
            #================================================================

            losses+=np.sum(batch_loss)

            # if batch_x+1%1 == 0:
            # print(f"Average_loss [{batch_x:>5d}/{y.shape[0]:>5d}] is: {losses/batch_x:>7f}")
        
        print(f"Average_loss for EPOCH is: {losses/y.shape[0]:>7f}")




    def test(self, X, y):
        y_softmax = self.predict_proba(X)

        loss, _ = self.categirical_crossentropy(y_softmax, y)
        self.pre_rec_f1(y, np.argmax(y_softmax, axis=1))
        print(f"TEST Average LOSS IS: {np.sum(loss)/y.shape[0]}")
        


    def pre_rec_f1(self, y_true, y_pred):
        precision_list = []
        recall_list = []
        f1_list = []
        for i in np.unique(y_true):

            tp = np.sum((y_pred == i) * (y_true == i))
            fp = np.sum((y_pred==i) * (y_true != i))
            fn = np.sum((y_pred!=i) * (y_true == i))
            precision = tp/max(tp+fp, 1)
            recall = tp/max(tp+fn, 1)
            f1 = 2*precision*recall/max((precision+recall, 0.000001))

            precision_list.append(precision)
            recall_list.append(recall)
            f1_list.append(f1)
        
        print(f"Macro precision = {sum(precision_list)/len(precision_list)}")
        print(f"Macro recall = {sum(recall_list)/len(recall_list)}")
        print(f"Macro f1 = {sum(f1_list)/len(f1_list)}")
        print(f"accuracy = {np.sum(y_pred==y_true)/y_true.shape[0]}")

In [178]:
from sklearn.model_selection import train_test_split
X = dataset[dataset.columns[1:]].to_numpy()
y = dataset["class"].to_numpy()

x_train, x_test, y_train1, y_test1 = train_test_split(X, y, test_size=0.2)

cls = MLP(n_per_layer=(22,10,5), num_classes=2, lr=0.00003)

for i in range(100):
    if i%100 == 0:
        print(f"EPOCH {i} start.")

    cls.train(x_train, y_train1, batch_size=100)
    
    if (i + 1) %1 == 0:
        cls.test(x_test, y_test1)
        print(f"EPOCH {i} end.\n ======================\n\n\n\n======================")

EPOCH 0 start.
Average_loss for EPOCH is: 0.997461
Macro precision = 0.5307109151388567
Macro recall = 0.5300689237294555
Macro f1 = 0.5274796376646114
accuracy = 0.5298461538461539
TEST Average LOSS IS: 0.7295866690282514
EPOCH 0 end.



Average_loss for EPOCH is: 0.719905
Macro precision = 0.6214116659031913
Macro recall = 0.6204461107324093
Macro f1 = 0.6196018173367859
accuracy = 0.6203076923076923
TEST Average LOSS IS: 0.6838465060972654
EPOCH 1 end.



Average_loss for EPOCH is: 0.677420
Macro precision = 0.6880010179412139
Macro recall = 0.6880027266530334
Macro f1 = 0.6879995273838995
accuracy = 0.688
TEST Average LOSS IS: 0.6472401092409359
EPOCH 2 end.



Average_loss for EPOCH is: 0.644227
Macro precision = 0.7337974334249101
Macro recall = 0.7335870635461637
Macro f1 = 0.7334896128653734
accuracy = 0.7335384615384616
TEST Average LOSS IS: 0.6161238713256971
EPOCH 3 end.



Average_loss for EPOCH is: 0.614234
Macro precision = 0.7651549615287798
Macro recall = 0.764875407104

In [155]:
import numpy as np
# def preload_y(y_true):
#     otv = np.zeros((y_true.shape[0], 3))
#     otv[np.arange(otv.shape[0]), y_true] += 1
#     return otv

# def categirical_crossentropy( y_pred, y_true):
#     y_pred = softmax( y_pred) 
#     print(y_pred)
#     y_true = preload_y(y_true)
#     cross_ent = np.sum(y_true * np.log(y_pred), axis=-1) / y_true.shape[0]
#     grad = y_pred - y_true
#     return cross_ent, grad

# # preload_y(np.array([0,0,1,2,0,1,2,1]))

# categirical_crossentropy(y_pred=np.array([[0.8,0.1,0.1],
#                                           [0.1,0.8,0.1],
#                                           [0.1,0.1,0.8],
#                                           [0.8,0.1,0.1],
#                                           [0.8,0.1,0.1],
#                                           [0.8,0.1,0.1]]),
                        # y_true=np.array([0,1,2,0,0,1]))
# y  = np.array([0,1,2,0,0,1])
y_otv = np.array([[1,0,0,0], [0,1,0,0], [0,1,0,0]])
y = np.array([[0.9, 0.05, 0.03, 0.02], [0.05, 0.9, 0.03, 0.02], [0.05, 0.03, 0.9, 0.02]])
y_otv = np.array([[1,0], [0,1], [0,1]])
y = np.array([[0.9, 0.1], [0.1, 0.9], [0.9, 0.1]])
# y = np.array([[0.9,0.5,0.5]])
# yal = np.diagflat(y)
# # np.einsum('ij,ik->ijk', y, y)
# # yal-np.einsum('ij,ik->ijk', y, y)
# grad_logits = y.copy()
# grad_logits[range(X.shape[0]), y_otv] -= 1
# grad_logits

# class Softmax:
#     def backward(out_grad):

#         if i == k:
#             dx_dy = y[i](1 - y[i])
#         else:
#             dx_dy = -y[i]*y[k]

fard = y - y_otv

n = y.shape[1]
y_y_matr = np.tile(y, (n, 1))
y_new = np.tile(y[:, :, np.newaxis], (1, 1, n))
# print(y)
# print(np.identity(n)[np.newaxis,:])
# print(y_new)
trans_axes = (0,2, 1)
np.transpose(y_new, axes = trans_axes)
print(np.transpose(y_new, axes = trans_axes))
print(np.identity(n)[np.newaxis,:])
# print(np.transpose(y_new, axes = (0,2, 1)))

# print(y_new)
# print(np.transpose(fard[:, np.newaxis], axes = trans_axes))
np.matmul((np.identity(n)[np.newaxis,:]  - np.transpose(y_new, axes = trans_axes)) * y_new, np.transpose(fard[:, np.newaxis], axes = trans_axes)).squeeze()



# print(np.transpose(fard[:, np.newaxis], axes = trans_axes))

# np.dot((np.identity(n)  - y_y_matr) * y_y_matr.T,  fard.T)

[[[0.9 0.1]
  [0.9 0.1]]

 [[0.1 0.9]
  [0.1 0.9]]

 [[0.9 0.1]
  [0.9 0.1]]]
[[[1. 0.]
  [0. 1.]]]


array([[-0.018,  0.018],
       [ 0.018, -0.018],
       [ 0.162, -0.162]])