In [1]:
import re
import json
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import traceback
import numpy_utils.layers

from importlib import reload
from scipy.io import loadmat
from torch.autograd import Variable
from torchvision.models import efficientnet_b0
from torch.nn import (
    AdaptiveAvgPool2d,
    Conv2d,
    Linear,
    SiLU,
    Sigmoid,
    Flatten,
    Dropout,
    BatchNorm2d,
    Sequential
)

from numpy_utils.layers import (
    Layer,
    LinearLayer,
    ConvLayer,
    SiLULayer,
    SigmoidLayer,
    CrossEntropyLoss,
    CrossEntropyCost,
    SoftmaxLayer,
    FlattenLayer,
    OneLayer,
    DropoutLayer,
    BatchNorm2dLayer,
    DepthwiseConvLayer
)
from data_utils.dataset import CarsDataset
from numpy_utils.utils import check_comb, ttn

In [2]:
# plt.rcParams['figure.figsize'] = (15, 5)
%load_ext autoreload
%autoreload 1

In [5]:
def annot_mat_to_csv(mat_path, csv_path):
    meta = loadmat(mat_path)
    annots = meta['annotations']
    c_name = annots[:, :][['class', 'fname']].squeeze(0)
    c = list(map(lambda x: x.item(), c_name['class']))
    name = list(map(lambda x: x.item(), c_name['fname']))
    df = pd.DataFrame({
        'class': c,
        'fname': name
    })
    df.to_csv(csv_path, index=False)

In [6]:
annot_mat_to_csv('../data/cars_train_annos.mat', '../data/train.csv')

In [112]:
dts = CarsDataset('../data/train', '../data/train.csv')

In [132]:
it = iter(dts)
hws = np.array((list(map(lambda x: x[0].size, it))))
h, w = map(lambda x: x.squeeze(1), np.split(hws, 2, axis=1))
print(h.shape, w.shape)

(8144,) (8144,)


In [135]:
print(len(np.unique(h)))
print(len(np.unique(w)))

654
829


In [31]:
def compare(X, ft, fn, ac=True):
    out = ft(X).detach().numpy()
    outn = fn(X.detach().numpy())
    print('MSE', np.power(out - outn, 2).sum())
    if ac:
        print('eq', np.allclose(out, outn))

In [19]:
model = efficientnet_b0(weights=None)

In [7]:
lt = Linear(512, 128)
ln = LinearLayer(512, 128)

In [12]:
ln.W = lt.weight.data.numpy()
ln.b = lt.bias.data.numpy()

In [13]:
X = torch.rand((1, 512))
Xn = X.numpy()

In [14]:
out = lt(X).detach().numpy()
outn = ln(Xn)
print(np.power(out - outn, 2).sum())
print(np.allclose(out, outn))

6.5755734e-13
False


In [15]:
ct = Conv2d(3, 4, 3)
cn = ConvLayer(3, 4, 3)
wt, bt = ct._parameters.values()
# cn.W = np.transpose(wt.data.numpy(), (0, 1, 2, 3))
cn.W = wt.data.numpy()
cn.b = bt.data.numpy()

In [16]:
X = torch.rand(4, 3, 5, 5)
Xn = X.numpy()
# Xn = np.arange(48).reshape(1, 3, 4, 4)

In [17]:
out = ct(X).detach().numpy()
outn = cn(Xn)
print(*(x.shape for x in (out, outn)))

(4, 4, 3, 3) (4, 4, 3, 3)


In [18]:
print(np.power(out - outn, 2).sum())
# print(np.power(out - outn2, 2).sum())
# print(np.power(out - outn3, 2).sum())
# print(np.power(out - outn4, 2).sum())
print(np.allclose(out, outn))
# print(np.allclose(out, outn2))
# print(np.allclose(out, outn3))
# print(np.allclose(out, outn4))

2.0724481e-13
True


In [41]:
X = torch.rand(5, 3, 4, 4)
compare(X, SiLU(), SiLULayer())
compare(X, Sigmoid(), SigmoidLayer())

MSE 1.339315e-13
eq True
MSE 1.314504e-13
eq True


In [44]:
X = torch.rand(5, 3, 16, 16)
avgpool = AdaptiveAvgPool2d(output_size=1)
out = avgpool(X)
print(out.shape)

torch.Size([5, 3, 1, 1])


In [20]:
class Md(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.l1 = nn.Linear(8, 4)
        self.act1 = nn.Sigmoid()

    def forward(self, X):
        x = self.l1(X)
        x = self.act1(x)
        return x

In [79]:
# md = Md()
md = nn.Linear(8, 4)
mdn = LinearLayer(8, 4)
mdn.W = md.weight.data.numpy()
mdn.b = md.bias.data.numpy()
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropyLoss()

In [80]:
X = torch.rand(5, 8)
lab = torch.randint(0, 4, (5,))
Xn = X.numpy()
labn = lab.numpy()

In [82]:
out = md(X)
outn = mdn(Xn)
loss = crit(out, lab)
lossn = critn(outn, labn)

tensor(1.3982, grad_fn=<NllLossBackward0>)
1.3982131
True


In [5]:
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropyLoss()
sm = SoftmaxLayer()
cre = CrossEntropyCost()

In [8]:
X0 = torch.rand(10, 4, requires_grad=True)
lab0 = torch.randint(0, 4, (10,))

In [27]:
bs = 3
# X = torch.rand(bs, 4, requires_grad=True)
# lab = torch.randint(0, 4, (bs,))
X = X0[:bs, :].clone().detach().requires_grad_(True)
lab = lab0[:bs].clone().detach()
Xn = X.detach().numpy()
labn = lab.numpy()

In [28]:
out1 = crit(X, lab)
out2 = critn(Xn, labn)
_out3 = sm(Xn)
out3 = cre(_out3, labn)
print(np.allclose(out1.detach(), out3))
print(np.allclose(out1.detach(), out2))

True
True


In [52]:
rest, resn, xts, xns, gt, gn = check_comb([crit], [sm, cre], X0, lab0, return_grad=True)

Forward is right True


In [78]:
md = nn.Linear(36, 4)
mdn = LinearLayer(36, 4)
mdn.W = md.weight.data.numpy()
mdn.b = md.bias.data.numpy()
sil = SiLU()
siln = SiLULayer()
sig = Sigmoid()
sign = SigmoidLayer()
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropyLoss()
c = Conv2d(3, 4, 3)
cn = ConvLayer(3, 4, 3)
cn.W = c.weight.data.numpy()
cn.b = c.bias.data.numpy()
dc = Conv2d(2, 2, 3, groups=2)
f = Flatten()
fn = FlattenLayer()
trash = OneLayer()
d = Dropout()
dn = DropoutLayer()
b = BatchNorm2d(4, track_running_stats=False)
bn = BatchNorm2dLayer(4)
bn.W = ttn(b.weight.data)
bn.b = ttn(b.bias.data)

In [4]:
bs = 10
X0 = torch.rand(bs, 2, 5, 5, requires_grad=True)
# X0 = torch.rand(bs, 4, 3, 3, requires_grad=True)
lab0 = torch.randint(0, 4, (bs,))

In [5]:
[cn, bn, fn, mdn, critn]
try:
    res = check_comb([dc, b, f, md], [trash], X0)#, lab0, return_grad=True)
except:
    traceback.print_exc()

Forward is right False
but 1.4633899


In [150]:
x = 0

In [157]:
print(b.weight.grad)
print(b.weight.grad - x)
print(x)
print(bn.dW.squeeze())
print(np.allclose(ttn(b.weight.grad), bn.dW))
print(np.allclose(ttn(b.bias.grad), bn.db))

tensor([-0.0811, -0.0107, -0.0752, -0.1199])
tensor([-0.0270, -0.0036, -0.0251, -0.0400])
tensor([-0.0541, -0.0071, -0.0502, -0.0799])
[-0.08113413 -0.01070037 -0.07522777 -0.11986075]
True
True


In [158]:
x = b.weight.grad.clone()

In [88]:
rest, resn, xts, xns, gt, gn = res
print(len(gt), len(gn))

5 5


In [89]:
grad_idx = 5
print('\n'.join(map(lambda x: str(np.allclose(x[0], x[1])), zip(gt, gn, list(range(grad_idx))))))

True
True
True
True
True


In [63]:
print(*(tuple(t.shape) for t in gt))
print(*(t.shape for t in gn))

(10, 4) (10, 36) (10, 4, 3, 3) (10, 4, 3, 3) (10, 3, 5, 5)
(10, 4) (10, 36) (10, 4, 3, 3) (10, 4, 3, 3) (10, 3, 5, 5)


In [70]:
print(gt[-2][0, 0])
print(gn[-2][0, 0])

tensor([[ 0.0646,  0.0195, -0.0254],
        [-0.0161,  0.0286, -0.0563],
        [ 0.0400,  0.0250, -0.0333]])
[[ 7.59728431e-03 -1.85820546e-03 -8.14245131e-03]
 [-7.46808136e-03  2.95161937e-03 -1.38175009e-02]
 [ 4.11711577e-03 -9.15708765e-05 -9.75870219e-03]]


In [2]:
dc = Conv2d(2, 2, 3, groups=2)
dcl = DepthwiseConvLayer(2, 3)
dcl.W = ttn(dc.weight.data)

In [3]:
bs = 1
x = torch.rand(bs, 2, 5, 5)
xn = ttn(x)

In [4]:
t = dc(x)
n = dcl(xn)
print(t.shape)
print(n.shape)
print(np.allclose(ttn(t), n))

(1, 2, 3, 3, 3, 3)
(2, 1, 3, 3)
(3, 3, 3, 3) (3, 3)
(3, 3, 3, 3) (3, 3)
torch.Size([1, 2, 3, 3])
(1, 2, 3, 3)
False


In [5]:
print(t[0, 0])
print(n[0, 0])

tensor([[-0.1462, -0.0389, -0.2410],
        [-0.2911,  0.0523, -0.2691],
        [ 0.1290, -0.5074, -0.1918]], grad_fn=<SelectBackward0>)
[[ 0.1214938   0.22880936  0.02667072]
 [-0.02340214  0.320019   -0.00147785]
 [ 0.39667377 -0.23974982  0.07582869]]
