In [1]:
class Tensor:
    def __init__(self, data, requires_grad=False):
        self.data = data
        self.grad = None
        self.requires_grad = requires_grad
        self._backward = lambda: None
        self._prev = []
    
    def mul(self, other):
        # Forward (기존 코드 그대로)
        result_data = []
        for i in range(len(self.data)):
            row = []
            for j in range(len(self.data[0])):
                row.append(self.data[i][j] * other.data[i][j])
            result_data.append(row)
        
        out = Tensor(result_data)
        out._prev = [self, other]  # 부모 기록
        
        # Backward 정의
        def _backward():
            # grad 초기화 (None이면)
            if self.grad is None:
                self.grad = [[0.0] * len(self.data[0]) for _ in range(len(self.data))]
            if other.grad is None:
                other.grad = [[0.0] * len(other.data[0]) for _ in range(len(other.data))]
            
            # Chain rule 적용
            for i in range(len(self.data)):
                for j in range(len(self.data[0])):
                    self.grad[i][j] += out.grad[i][j] * other.data[i][j]
                    other.grad[i][j] += out.grad[i][j] * self.data[i][j]
        
        out._backward = _backward
        return out
    
    def backward(self):
        # 시작점: grad = 1로 초기화
        if self.grad is None:
            self.grad = [[1.0] * len(self.data[0]) for _ in range(len(self.data))]
        
        # 내 _backward 실행
        self._backward()
        
        # 부모들에게 전파 (도미노!)
        for parent in self._prev:
            parent.backward()

In [3]:
class ReLU:
    def __init__(self):  # 여기만 수정!
        self.mask = None
    
    def forward(self, x):
        output = []
        mask = []

        for i in range(len(x.data)):  # 여기도 : 빠졌네요!
            output_row = []
            mask_row = []
            for j in range(len(x.data[0])):
                val = x.data[i][j]
                output_row.append(max(0, val))
                if val > 0:
                    mask_row.append(True)
                else:
                    mask_row.append(False)
            mask.append(mask_row)
            output.append(output_row)
        
        self.mask = mask
        return Tensor(output)
    
    def backward(self, grad_output):
        grad_input_data = []

        for i in range(len(grad_output.data)):
            row = []
            for j in range(len(grad_output.data[0])):
                # mask[i][j]가 True면 grad 통과
                if self.mask[i][j]:
                    row.append(grad_output.data[i][j]) # 통과
                else: # x ≤ 0이었다면
                    row.append(0.0) #차단
            grad_input_data.append(row)

        return Tensor(grad_input_data)
        


    


In [4]:
relu = ReLU()

x = Tensor([[-1.0, 2.0], [-3.0, 4.0]])
y = relu.forward(x)

print("Forward:")
print(y.data)  
# 예상: [[0.0, 2.0], [0.0, 4.0]]

# Backward
grad_out = Tensor([[1.0, 1.0], [1.0, 1.0]])
grad_in = relu.backward(grad_out)

print("\nBackward:")
print(grad_in.data)  
# 예상: [[0.0, 1.0], [0.0, 1.0]]
print("\nMask:")
print(relu.mask)
# 예상: [[False, True], [False, True]]

Forward:
[[0, 2.0], [0, 4.0]]

Backward:
[[0.0, 1.0], [0.0, 1.0]]

Mask:
[[False, True], [False, True]]
