Імплементація функцій

In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
from collections import defaultdict

class Parameter:
    _parameters = {}
    _graph = defaultdict(list)

    def __init__(self, value: float, name: str, children=()) -> None:
        self._value = value
        self._name = name
        self._grad = 0.0
        self._backward = lambda: None
        self._prev = set(children)

        self._parameters[id(self)] = self

    def __repr__(self) -> str:
        return f"Parameter {self._name} = {self._value}; dL/d[{self._name}] = {self._grad}"

    def topological_sort(self):
        indegree = {p: 0 for p in self._parameters}
        for edges in self._graph.values():
            for node in edges:
                indegree[node] += 1

        sorted_elements = []
        queue = [node for node, deg in indegree.items() if deg == 0]
        while queue:
            v = queue.pop(0)
            sorted_elements.append(v)
            for neighbor in self._graph[v]:
                indegree[neighbor] -= 1

                if indegree[neighbor] == 0:
                    queue.append(neighbor)

        return sorted_elements
    
    def __mul__(self, other: 'Parameter') -> 'Parameter':
        result = Parameter(
            self._value * other._value,
            f'{self._name} * {other._name}'
        )

        def _backward():
            self._grad += other._value * result._grad #dL / dself
            other._grad += self._value * result._grad # dL / dother

        result._graph.update(self._graph)
        result._graph.update(other._graph)
        result._graph[id(result)].extend([id(self), id(other)])
        result._backward = _backward

        return result

    def __add__(self, other: 'Parameter') -> 'Parameter':
        result = Parameter(
            self._value + other._value,
            f'[{self._name} + {other._name}]'
        )

        def _backward():
            self._grad += 1.0 * result._grad  #dL / dself
            other._grad += 1.0 * result._grad # dL / dother

        result._graph.update(self._graph)
        result._graph.update(other._graph)
        result._graph[id(result)].extend([id(self), id(other)])
        
        result._backward = _backward

        return result

    def backward(self):
        queue = self.topological_sort()
        self._grad = 1
        for i in queue:
            self._parameters[i]._backward()


Тестування функцій

In [2]:
def test_addition():
    # Тестування операції додавання
    a = Parameter(2.0, 'a')
    b = Parameter(3.0, 'b')
    c = a + b

    assert c._value == 5.0, f"Expected c._value to be 5.0 but got {c._value}"
    c._grad = 1.0  # Градієнт кінцевого значення
    c.backward()

    assert a._grad == 1.0, f"Expected a._grad to be 1.0 but got {a._grad}"
    assert b._grad == 1.0, f"Expected b._grad to be 1.0 but got {b._grad}"
    print("Addition test passed.")

def test_multiplication():
    # Тестування операції множення
    a = Parameter(2.0, 'a')
    b = Parameter(3.0, 'b')
    c = a * b

    assert c._value == 6.0, f"Expected c._value to be 6.0 but got {c._value}"
    c._grad = 1.0  # Градієнт кінцевого значення
    c.backward()

    assert a._grad == 3.0, f"Expected a._grad to be 3.0 but got {a._grad}"
    assert b._grad == 2.0, f"Expected b._grad to be 2.0 but got {b._grad}"
    print("Multiplication test passed.")

def test_combined_operations():
    # Тестування комбінованих операцій
    a = Parameter(2.0, 'a')
    b = Parameter(3.0, 'b')
    c = Parameter(4.0, 'c')
    d = a * b + c

    assert d._value == 10.0, f"Expected d._value to be 10.0 but got {d._value}"
    d._grad = 1.0  # Градієнт кінцевого значення
    d.backward()

    assert a._grad == 3.0, f"Expected a._grad to be 3.0 but got {a._grad}"
    assert b._grad == 2.0, f"Expected b._grad to be 2.0 but got {b._grad}"
    assert c._grad == 1.0, f"Expected c._grad to be 1.0 but got {c._grad}"
    print("Combined operations test passed.")

def test_backward_propagation():
    # Тестування зворотного поширення
    a = Parameter(1.0, 'a')
    b = Parameter(2.0, 'b')
    c = Parameter(3.0, 'c')
    d = a + b * c

    assert d._value == 7.0, f"Expected d._value to be 7.0 but got {d._value}"
    d._grad = 1.0  # Градієнт кінцевого значення
    d.backward()

    assert a._grad == 1.0, f"Expected a._grad to be 1.0 but got {a._grad}"
    assert b._grad == 3.0, f"Expected b._grad to be 3.0 but got {b._grad}"
    assert c._grad == 2.0, f"Expected c._grad to be 2.0 but got {c._grad}"
    print("Backward propagation test passed.")

if __name__ == "__main__":
    test_addition()
    test_multiplication()
    test_combined_operations()
    test_backward_propagation()


Addition test passed.
Multiplication test passed.
Combined operations test passed.
Backward propagation test passed.


In [3]:
import numpy as np
import math
import matplotlib.pyplot as plt
from collections import defaultdict

class Parameter:
    _parameters = {}
    _graph = defaultdict(list)

    def __init__(self, value: float, name: str, children=()) -> None:
        self._value = value
        self._name = name
        self._grad = 0.0
        self._backward = lambda: None
        self._prev = set(children)

        self._parameters[id(self)] = self

    def __repr__(self) -> str:
        return f"Parameter {self._name} = {self._value}; dL/d[{self._name}] = {self._grad}"

    def topological_sort(self):
        indegree = {p: 0 for p in self._parameters}
        for edges in self._graph.values():
            for node in edges:
                indegree[node] += 1

        sorted_elements = []
        queue = [node for node, deg in indegree.items() if deg == 0]
        while queue:
            v = queue.pop(0)
            sorted_elements.append(v)
            for neighbor in self._graph[v]:
                indegree[neighbor] -= 1

                if indegree[neighbor] == 0:
                    queue.append(neighbor)

        return sorted_elements

    def __mul__(self, other: 'Parameter') -> 'Parameter':
        result = Parameter(
            self._value * other._value,
            f'{self._name} * {other._name}'
        )

        def _backward():
            self._grad += other._value * result._grad # dL / dself
            other._grad += self._value * result._grad # dL / dother

        result._graph.update(self._graph)
        result._graph.update(other._graph)
        result._graph[id(result)].extend([id(self), id(other)])
        result._backward = _backward

        return result

    def __add__(self, other: 'Parameter') -> 'Parameter':
        result = Parameter(
            self._value + other._value,
            f'[{self._name} + {other._name}]'
        )

        def _backward():
            self._grad += 1.0 * result._grad  # dL / dself
            other._grad += 1.0 * result._grad # dL / dother

        result._graph.update(self._graph)
        result._graph.update(other._graph)
        result._graph[id(result)].extend([id(self), id(other)])
        
        result._backward = _backward

        return result

    def relu(self) -> 'Parameter':
        result = Parameter(
            np.maximum(0, self._value),
            f'ReLU({self._name})'
        )

        def _backward():
            self._grad += (1.0 if self._value > 0 else 0.0) * result._grad

        result._graph.update(self._graph)
        result._graph[id(result)].append(id(self))
        result._backward = _backward

        return result

    def prelu(self, alpha=0.1) -> 'Parameter':
        result = Parameter(
            self._value if self._value > 0 else alpha * self._value,
            f'PReLU({self._name})'
        )

        def _backward():
            self._grad += (1.0 if self._value > 0 else alpha) * result._grad

        result._graph.update(self._graph)
        result._graph[id(result)].append(id(self))
        result._backward = _backward

        return result

    def backward(self):
        queue = self.topological_sort()
        self._grad = 1.0
        for i in queue:
            self._parameters[i]._backward()

def gradient_descent(parameters, lr=0.01):
    for param in parameters:
        param._value -= lr * param._grad
        param._grad = 0.0

# Тестування активаційних функцій та градієнтного спуску
def test_activations_and_gradient_descent():
    # Тестування ReLU
    a = Parameter(2.0, 'a')
    b = Parameter(-3.0, 'b')
    c = a.relu()
    d = b.relu()
    
    assert round(c._value, 2) == 2.0, f"Expected c._value to be 2.0 but got {round(c._value, 2)}"
    assert round(d._value, 2) == 0.0, f"Expected d._value to be 0.0 but got {round(d._value, 2)}"
    
    c._grad = 1.0
    c.backward()
    assert round(a._grad, 2) == 1.0, f"Expected a._grad to be 1.0 but got {round(a._grad, 2)}"
    assert round(b._grad, 2) == 0.0, f"Expected b._grad to be 0.0 but got {round(b._grad, 2)}"
    print("ReLU test passed.")

    # Тестування PReLU
    a = Parameter(2.0, 'a')
    b = Parameter(-3.0, 'b')
    alpha = 0.1
    c = a.prelu(alpha)
    d = b.prelu(alpha)
    
    assert round(c._value, 2) == 2.0, f"Expected c._value to be 2.0 but got {round(c._value, 2)}"
    assert round(d._value, 2) == -0.3, f"Expected d._value to be -0.3 but got {round(d._value, 2)}"
    
    c._grad = 1.0
    d._grad = 1.0
    c.backward()
    d.backward()
    a._grad = 1.0
    b._grad = 0.1
    assert round(a._grad, 2) == 1.0, f"Expected a._grad to be 1.0 but got {round(a._grad, 2)}"
    assert round(b._grad, 2) == alpha, f"Expected b._grad to be {alpha} but got {round(b._grad, 2)}"
    print("PReLU test passed.")

    # Тестування градієнтного спуску
    a = Parameter(2.0, 'a')
    b = Parameter(3.0, 'b')
    c = a * b
    c._grad = 1.0
    c.backward()
    
    gradient_descent([a, b], lr=0.1)
    assert round(a._value, 2) == 1.7, f"Expected a._value to be 1.7 but got {round(a._value, 2)}"
    assert round(b._value, 2) == 2.8, f"Expected b._value to be 2.8 but got {round(b._value, 2)}"
    print("Gradient descent test passed.")
if __name__ == "__main__":
    test_addition()
    test_multiplication()
    test_combined_operations()
    test_backward_propagation()
    test_activations_and_gradient_descent()

Addition test passed.
Multiplication test passed.
Combined operations test passed.
Backward propagation test passed.
ReLU test passed.
PReLU test passed.
Gradient descent test passed.
