In [1]:
from vibegrad import Tensor

In [2]:
a = Tensor(2)
b = Tensor(4)

In [3]:
import numpy as np
c = Tensor(1)
c.data

array(1.)

In [4]:
c = a*b
c.backward()
c.grad

array(1.)

In [5]:
b.grad, a.grad

(array(2.), array(4.))

In [6]:
c.zero_grad()
a.zero_grad()
b.zero_grad()

In [7]:
from vibegrad.nn import Linear, Sequential

In [8]:
a = Linear(1, 1)

In [9]:
import numpy as np
np.random.uniform((1,1))

array([1., 1.])

In [10]:
a(Tensor([1]))

Tensor(data=[0.41695603])

In [11]:
a.weight, a.bias

(Tensor(data=[[0.41695603]]), Tensor(data=[0.]))

In [12]:
import torch
from torch.nn import Linear as tLinear

In [13]:
a = tLinear(1,1)

In [14]:
a(torch.tensor([1.0]))

tensor([-0.2204], grad_fn=<ViewBackward0>)

In [15]:
a.weight, a.bias

(Parameter containing:
 tensor([[-0.1379]], requires_grad=True),
 Parameter containing:
 tensor([-0.0824], requires_grad=True))

In [16]:
from vibegrad.nn import *
from vibegrad.optims import *

In [17]:
model = Sequential([
    Linear(256, 300),
    ReLU(),
    Linear(300, 300),
    ReLU(),
    Linear(300, 1),
    Sigmoid()
])
for param in model.parameters:
    print(param.data.shape)
    print(param.grad)
for layer in model.layers:
    if hasattr(layer, 'weight'):
        print("hi")
        print(layer.weight.grad)
        print(layer.bias.grad)
type(model.parameters)

(256, 300)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
(300,)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0

list

In [18]:
input_data = Tensor(np.random.randn(10, 256).astype(np.float32))

output_data = Tensor(np.random.randint(0, 2, size=(10, 1)).astype(np.float32))

print("Input data shape:", input_data.shape)
print("Output data shape:", output_data.shape)

Input data shape: (10, 256)
Output data shape: (10, 1)


In [19]:
loss_fn = BCELoss()
optim = SGD(model.parameters, 1)

In [20]:
for _ in range(10):
    logits = model(input_data)
    loss = loss_fn(logits, output_data)
    loss.backward()
    optim.step()
    optim.zero_grad()
    print(loss)

Tensor(data=4.6523017150177965)
Tensor(data=4.598480935177635)
Tensor(data=4.545592412072887)
Tensor(data=4.493650568878865)
Tensor(data=4.442666689621495)
Tensor(data=4.39264894987245)
Tensor(data=4.343602492636895)
Tensor(data=4.295529559809173)
Tensor(data=4.248429684434053)
Tensor(data=4.202299909179132)


In [21]:
a = np.array(1)
b = np.array(np.arange(10)).reshape(10,1)
print(a,b)
a + b

1 [[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]
 [9]]


array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10]])

In [22]:
model_torch = torch.nn.Sequential(
    torch.nn.Linear(256, 300),
    torch.nn.ReLU(),
    torch.nn.Linear(300, 300),
    torch.nn.ReLU(),
    torch.nn.Linear(300, 1),
    torch.nn.Sigmoid()
)
model_torch.parameters

<bound method Module.parameters of Sequential(
  (0): Linear(in_features=256, out_features=300, bias=True)
  (1): ReLU()
  (2): Linear(in_features=300, out_features=300, bias=True)
  (3): ReLU()
  (4): Linear(in_features=300, out_features=1, bias=True)
  (5): Sigmoid()
)>

In [23]:
input_data = torch.Tensor(np.random.randn(10, 256).astype(np.float32))
output_data = torch.Tensor(np.random.randint(0, 2, size=(10, 1)).astype(np.float32))

loss_fn = torch.nn.BCELoss()
optim = torch.optim.SGD(model_torch.parameters(), 0.1, momentum=0.0)

for _ in range(10):
    logits = model_torch(input_data)
    loss = loss_fn(logits, output_data)
    loss.backward()
    optim.step()
    optim.zero_grad()
    print(loss)

tensor(0.7023, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6571, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6168, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.5396, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4995, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4581, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.4150, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3710, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.3276, grad_fn=<BinaryCrossEntropyBackward0>)


In [None]:
model.total_params()

167701

In [25]:
X = np.random.uniform(-1, 1, size=(100, 256))
y = np.random.choice([0, 1], size=(100, 10), p=[0.5, 0.5])
X.shape
X = Tensor(X)
y = Tensor(y) 
y.data[:5]

array([[1, 0, 1, 0, 0, 1, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 1, 0, 0, 0, 1],
       [1, 1, 1, 1, 0, 0, 0, 1, 0, 0],
       [1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 0, 1, 0, 0, 0, 1]])

In [26]:
out = model(X)
out.data.shape, y.data.shape

ValueError: operands could not be broadcast together with shapes (100,300) (10,300) 

In [26]:
prediction = Tensor(np.array([0.9, 0.4, 0.7, 0.2]))
target = Tensor(np.array([1, 0, 1, 0]))

loss_fn = BCELoss(reduction="sum")
loss = loss_fn(prediction, target)
print(loss)
loss.backward()
prediction.grad

Tensor(data=1.1960046346767592)


array([-1.11111111,  1.66666667, -1.42857143,  1.25      ])

In [27]:
prediction = torch.tensor(np.array([0.9, 0.4, 0.7, 0.2]), requires_grad=True)
target = torch.tensor(np.array([1,0,1,0]), dtype=torch.double, requires_grad=True)
loss_fn_torch = torch.nn.BCELoss(reduction="sum")
loss_torch = loss_fn_torch(prediction, target)
print(loss_torch)
loss_torch.backward()
prediction.grad

tensor(1.1960, dtype=torch.float64, grad_fn=<BinaryCrossEntropyBackward0>)


tensor([-1.1111,  1.6667, -1.4286,  1.2500], dtype=torch.float64)

In [28]:
model = torch.nn.Sequential()
model.append(torch.nn.Linear(256, 300))
model.append(torch.nn.Linear(300,300))
model.append(torch.nn.Linear(300,10))

Sequential(
  (0): Linear(in_features=256, out_features=300, bias=True)
  (1): Linear(in_features=300, out_features=300, bias=True)
  (2): Linear(in_features=300, out_features=10, bias=True)
)

In [29]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total number of parameters: {total_params}")

Total number of parameters: 170410


In [30]:
model.parameters

<bound method Module.parameters of Sequential(
  (0): Linear(in_features=256, out_features=300, bias=True)
  (1): Linear(in_features=300, out_features=300, bias=True)
  (2): Linear(in_features=300, out_features=10, bias=True)
)>

In [31]:
import vibegrad.nn as nn

In [32]:
nn.relu(Tensor(2.0))

Tensor(data=2.0)

In [32]:
import numpy as np

# Example: Broadcasting shapes
shape1 = (1,)
shape2 = (10, 1)
arr1 = np.random.randn(1)
arr2 = np.random.randn(10,1)

# np.broadcast_shapes(shape1, shape2), np.broadcast_shapes(shape2, shape1)
arr1 += arr2

ValueError: non-broadcastable output operand with shape (1,) doesn't match the broadcast shape (10,1)

In [34]:
arr3 = np.broadcast_to(arr1, arr2.shape)

In [36]:
arr2 += arr3