In [None]:
#| default_exp core.tensor
#| export

import numpy as np

# Constants for memory calculations
BYTES_PER_FLOAT32 = 4  # Standard float32 size in bytes
KB_TO_BYTES = 1024  # Kilobytes to bytes conversion
MB_TO_BYTES = 1024 * 1024  # Megabytes to bytes conversion

# Tensor class

In [None]:
#| export
class Tensor:
    """Educational tensor - the foundation of machine learning computation.

    This class provides the core data structure for all ML operations:
    - data: The actual numerical values (NumPy array)
    - shape: Dimensions of the tensor
    - size: Total number of elements
    - dtype: Data type (float32)

    All arithmetic, matrix, and shape operations are built on this foundation.
    """
    def __init__(self, data):
        """Create a new tensor from data.

        TODO: Initialize a Tensor by wrapping data in a NumPy array and setting attributes.

        APPROACH:
        1. Convert data to NumPy array with dtype=float32
        2. Store the array as self.data
        3. Set self.shape from the array's shape
        4. Set self.size from the array's size
        5. Set self.dtype from the array's dtype

        EXAMPLE:
        >>> t = Tensor([1, 2, 3])
        >>> print(t.shape)
        (3,)
        >>> print(t.size)
        3

        HINT: Use np.array(data, dtype=np.float32) to convert data to NumPy array
        """
        self.data = np.array(data, dtype=np.float32)
        self.shape = self.data.shape
        self.size = self.data.size
        self.dtype = self.data.dtype

    def __repr__(self):
        "String representation for debugging"
        return f"Tensor(data={self.data}, shape={self.shape})"

    def __str__(self):
        "String representation"
        return f"Tensor({self.data})"

    def numpy(self):
        "Return the NumPy array"
        return self.data

    def memory_footprint(self):
        """Calculate exact memory usage in bytes.

        Systems Concept: Understanding memory footprint is fundamental to ML systems.
        Before running any operation, engineers should know how much memory it requires.

        Returns:
            int: Memory usage in bytes (e.g., 1000x1000 float32 = 4MB)
        """
        return self.data.nbytes

    def __add__(self, other):
        """Add two tensors element-wise with broadcasting support.

        TODO: Implement element-wise addition that works with both Tensors and scalars.

        APPROACH:
        1. Check if other is a Tensor (use isinstance)
        2. If Tensor: add self.data + other.data
        3. If scalar: add self.data + other (broadcasting)
        4. Wrap result in new Tensor

        EXAMPLE:
        >>> a = Tensor([1, 2, 3])
        >>> b = Tensor([4, 5, 6])
        >>> c = a + b
        >>> print(c.data)
        [5. 7. 9.]

        HINT: NumPy's + operator handles broadcasting automatically
        """
        if isinstance(other, Tensor):
            return Tensor(self.data + other.data)
        return self.data + other

    def __sub__(self, other):
        """Subtract two tensors element-wise.

        TODO: Implement element-wise subtraction (same pattern as __add__).

        APPROACH:
        1. Check if other is a Tensor
        2. If Tensor: subtract self.data - other.data
        3. If scalar: subtract self.data - other
        4. Return new Tensor with result

        EXAMPLE:
        >>> a = Tensor([5, 7, 9])
        >>> b = Tensor([1, 2, 3])
        >>> c = a - b
        >>> print(c.data)
        [4. 5. 6.]

        HINT: Follow the same pattern as __add__ but with subtraction
        """

        if isinstance(other, Tensor):
            return Tensor(self.data - other.data)
        return self.data - other

    def __mul__(self, other):
        """Multiply two tensors element-wise (NOT matrix multiplication).

        TODO: Implement element-wise multiplication (same pattern as __add__).

        APPROACH:
        1. Check if other is a Tensor
        2. If Tensor: multiply self.data * other.data
        3. If scalar: multiply self.data * other
        4. Return new Tensor with result

        EXAMPLE:
        >>> a = Tensor([1, 2, 3])
        >>> b = Tensor([4, 5, 6])
        >>> c = a * b
        >>> print(c.data)
        [ 4. 10. 18.]

        HINT: Element-wise multiplication is *, not matrix multiplication (@)
        """
        if isinstance(other, Tensor):
            return Tensor(self.data * other.data)
        return self.data * other

    def __truediv__(self, other):
        """Divide two tensors element-wise.

        TODO: Implement element-wise division (same pattern as __add__).

        APPROACH:
        1. Check if other is a Tensor
        2. If Tensor: divide self.data / other.data
        3. If scalar: divide self.data / other
        4. Return new Tensor with result

        EXAMPLE:
        >>> a = Tensor([4, 6, 8])
        >>> b = Tensor([2, 2, 2])
        >>> c = a / b
        >>> print(c.data)
        [2. 3. 4.]

        HINT: Division creates float results automatically due to float32 dtype
        """
        if isinstance(other, Tensor):
            return Tensor(self.data / other.data)
        return Tensor(self.data / other)

    def matmul(self, other):
        """Matrix multiplication of two tensors.

        TODO: Implement matrix multiplication with shape validation.

        APPROACH:
        1. Validate other is a Tensor (raise TypeError if not)
        2. Check for scalar cases (0D tensors) - use element-wise multiply
        3. For 2D+ matrices: validate inner dimensions match (shape[-1] == shape[-2])
        4. For 2D matrices: use explicit nested loops (educational)
        5. For batched (3D+): use np.matmul for correctness
        6. Return result wrapped in Tensor

        EXAMPLE:
        >>> a = Tensor([[1, 2], [3, 4]])  # 2Ã—2
        >>> b = Tensor([[5, 6], [7, 8]])  # 2Ã—2
        >>> c = a.matmul(b)
        >>> print(c.data)
        [[19. 22.]
         [43. 50.]]

        HINTS:
        - Inner dimensions must match: (M, K) @ (K, N) = (M, N)
        - For 2D case: use np.dot(a[i, :], b[:, j]) for each output element
        - Raise ValueError with clear message if shapes incompatible
        """
        if not isinstance(other, Tensor):
            raise TypeError("Both elements must be tensors")
        if other.size == 1:
            return self * other
        if not self.shape[-1] == other.shape[-2]:
            raise ValueError("Shapes don't match")
        if self.shape == (2,2):
            result = []
            for i in range(2):
                for j in range(2):
                    result.append(np.dot[self.data[i,:], other.data[:,j]])
            return Tensor()



# Unit tests

In [None]:
def test_unit_matrix_multiplication():
    """ðŸ§ª Test matrix multiplication operations."""
    print("ðŸ§ª Unit Test: Matrix Multiplication...")

    # Test 2Ã—2 matrix multiplication (basic case)
    a = Tensor([[1, 2], [3, 4]])  # 2Ã—2
    b = Tensor([[5, 6], [7, 8]])  # 2Ã—2
    result = a.matmul(b)
    # Expected: [[1Ã—5+2Ã—7, 1Ã—6+2Ã—8], [3Ã—5+4Ã—7, 3Ã—6+4Ã—8]] = [[19, 22], [43, 50]]
    expected = np.array([[19, 22], [43, 50]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # # Test rectangular matrices (common in neural networks)
    # c = Tensor([[1, 2, 3], [4, 5, 6]])  # 2Ã—3 (like batch_size=2, features=3)
    # d = Tensor([[7, 8], [9, 10], [11, 12]])  # 3Ã—2 (like features=3, outputs=2)
    # result = c.matmul(d)
    # # Expected: [[1Ã—7+2Ã—9+3Ã—11, 1Ã—8+2Ã—10+3Ã—12], [4Ã—7+5Ã—9+6Ã—11, 4Ã—8+5Ã—10+6Ã—12]]
    # expected = np.array([[58, 64], [139, 154]], dtype=np.float32)
    # assert np.array_equal(result.data, expected)

    # # Test matrix-vector multiplication (common in forward pass)
    # matrix = Tensor([[1, 2, 3], [4, 5, 6]])  # 2Ã—3
    # vector = Tensor([1, 2, 3])  # 3Ã—1 (conceptually)
    # result = matrix.matmul(vector)
    # # Expected: [1Ã—1+2Ã—2+3Ã—3, 4Ã—1+5Ã—2+6Ã—3] = [14, 32]
    # expected = np.array([14, 32], dtype=np.float32)
    # assert np.array_equal(result.data, expected)

    # # Test shape validation - should raise clear error
    # try:
    #     incompatible_a = Tensor([[1, 2]])     # 1Ã—2
    #     incompatible_b = Tensor([[1], [2], [3]])  # 3Ã—1
    #     incompatible_a.matmul(incompatible_b)  # 1Ã—2 @ 3Ã—1 should fail (2 â‰  3)
    #     assert False, "Should have raised ValueError for incompatible shapes"
    # except ValueError as e:
    #     assert "Inner dimensions must match" in str(e)
    #     assert "2 â‰  3" in str(e)  # Should show specific dimensions

    print("âœ… Matrix multiplication works correctly!")

if __name__ == "__main__":
    test_unit_matrix_multiplication()

ðŸ§ª Unit Test: Matrix Multiplication...


TypeError: 'numpy._ArrayFunctionDispatcher' object is not subscriptable

In [None]:
def test_unit_arithmetic_operations():
    """ðŸ§ª Test arithmetic operations with broadcasting."""
    print("ðŸ§ª Unit Test: Arithmetic Operations...")

    # Test tensor + tensor
    a = Tensor([1, 2, 3])
    b = Tensor([4, 5, 6])
    result = a + b
    assert np.array_equal(result.data, np.array([5, 7, 9], dtype=np.float32))

    # Test tensor + scalar (very common in ML)
    result = a + 10
    assert np.array_equal(result.data, np.array([11, 12, 13], dtype=np.float32))

    # Test broadcasting with different shapes (matrix + vector)
    matrix = Tensor([[1, 2], [3, 4]])
    vector = Tensor([10, 20])
    result = matrix + vector
    expected = np.array([[11, 22], [13, 24]], dtype=np.float32)
    assert np.array_equal(result.data, expected)

    # Test subtraction (data centering)
    result = b - a
    assert np.array_equal(result.data, np.array([3, 3, 3], dtype=np.float32))

    # Test multiplication (scaling)
    result = a * 2
    assert np.array_equal(result.data, np.array([2, 4, 6], dtype=np.float32))

    # Test division (normalization)
    result = b / 2
    assert np.array_equal(result.data, np.array([2.0, 2.5, 3.0], dtype=np.float32))

    # Test chaining operations (common in ML pipelines)
    normalized = (a - 2) / 2  # Center and scale
    expected = np.array([-0.5, 0.0, 0.5], dtype=np.float32)
    assert np.allclose(normalized.data, expected)

    print("âœ… Arithmetic operations work correctly!")

if __name__ == "__main__":
    test_unit_arithmetic_operations()

ðŸ§ª Unit Test: Arithmetic Operations...
âœ… Arithmetic operations work correctly!


# Experiments

In [None]:
A = np.array([[2,2], [2,2]])
for i in A:
    print(i)

[2 2]
[2 2]
