In [91]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

from scipy.io import loadmat
from torch.autograd import Variable
from torchvision.models import efficientnet_b0
from torch.nn import (
    AdaptiveAvgPool2d,
    Conv2d,
    Linear,
    SiLU,
    Sigmoid,
    Flatten,
    Dropout,
    BatchNorm2d
)

from numpy_utils.layers import (
    Layer,
    LinearLayer,
    ConvLayer,
    SiLULayer,
    SigmoidLayer,
    CrossEntropyLoss,
    CrossEntropyCost,
    SoftmaxLayer,
    FlattenLayer,
    OneLayer,
    DropoutLayer,
    BatchNorm2dLayer
)
from data_utils.dataset import CarsDataset
from numpy_utils.utils import check_comb, ttn

In [7]:
# plt.rcParams['figure.figsize'] = (15, 5)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
def annot_mat_to_csv(mat_path, csv_path):
    meta = loadmat(mat_path)
    annots = meta['annotations']
    c_name = annots[:, :][['class', 'fname']].squeeze(0)
    c = list(map(lambda x: x.item(), c_name['class']))
    name = list(map(lambda x: x.item(), c_name['fname']))
    df = pd.DataFrame({
        'class': c,
        'fname': name
    })
    df.to_csv(csv_path, index=False)

In [6]:
annot_mat_to_csv('../data/cars_train_annos.mat', '../data/train.csv')

In [112]:
dts = CarsDataset('../data/train', '../data/train.csv')

In [132]:
it = iter(dts)
hws = np.array((list(map(lambda x: x[0].size, it))))
h, w = map(lambda x: x.squeeze(1), np.split(hws, 2, axis=1))
print(h.shape, w.shape)

(8144,) (8144,)


In [135]:
print(len(np.unique(h)))
print(len(np.unique(w)))

654
829


In [31]:
def compare(X, ft, fn, ac=True):
    out = ft(X).detach().numpy()
    outn = fn(X.detach().numpy())
    print('MSE', np.power(out - outn, 2).sum())
    if ac:
        print('eq', np.allclose(out, outn))

In [19]:
model = efficientnet_b0(weights=None)

In [7]:
lt = Linear(512, 128)
ln = LinearLayer(512, 128)

In [12]:
ln.W = lt.weight.data.numpy()
ln.b = lt.bias.data.numpy()

In [13]:
X = torch.rand((1, 512))
Xn = X.numpy()

In [14]:
out = lt(X).detach().numpy()
outn = ln(Xn)
print(np.power(out - outn, 2).sum())
print(np.allclose(out, outn))

6.5755734e-13
False


In [15]:
ct = Conv2d(3, 4, 3)
cn = ConvLayer(3, 4, 3)
wt, bt = ct._parameters.values()
# cn.W = np.transpose(wt.data.numpy(), (0, 1, 2, 3))
cn.W = wt.data.numpy()
cn.b = bt.data.numpy()

In [16]:
X = torch.rand(4, 3, 5, 5)
Xn = X.numpy()
# Xn = np.arange(48).reshape(1, 3, 4, 4)

In [17]:
out = ct(X).detach().numpy()
outn = cn(Xn)
print(*(x.shape for x in (out, outn)))

(4, 4, 3, 3) (4, 4, 3, 3)


In [18]:
print(np.power(out - outn, 2).sum())
# print(np.power(out - outn2, 2).sum())
# print(np.power(out - outn3, 2).sum())
# print(np.power(out - outn4, 2).sum())
print(np.allclose(out, outn))
# print(np.allclose(out, outn2))
# print(np.allclose(out, outn3))
# print(np.allclose(out, outn4))

2.0724481e-13
True


In [41]:
X = torch.rand(5, 3, 4, 4)
compare(X, SiLU(), SiLULayer())
compare(X, Sigmoid(), SigmoidLayer())

MSE 1.339315e-13
eq True
MSE 1.314504e-13
eq True


In [44]:
X = torch.rand(5, 3, 16, 16)
avgpool = AdaptiveAvgPool2d(output_size=1)
out = avgpool(X)
print(out.shape)

torch.Size([5, 3, 1, 1])


In [20]:
class Md(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.l1 = nn.Linear(8, 4)
        self.act1 = nn.Sigmoid()

    def forward(self, X):
        x = self.l1(X)
        x = self.act1(x)
        return x

In [79]:
# md = Md()
md = nn.Linear(8, 4)
mdn = LinearLayer(8, 4)
mdn.W = md.weight.data.numpy()
mdn.b = md.bias.data.numpy()
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropyLoss()

In [80]:
X = torch.rand(5, 8)
lab = torch.randint(0, 4, (5,))
Xn = X.numpy()
labn = lab.numpy()

In [82]:
out = md(X)
outn = mdn(Xn)
loss = crit(out, lab)
lossn = critn(outn, labn)

tensor(1.3982, grad_fn=<NllLossBackward0>)
1.3982131
True


In [5]:
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropyLoss()
sm = SoftmaxLayer()
cre = CrossEntropyCost()

In [8]:
X0 = torch.rand(10, 4, requires_grad=True)
lab0 = torch.randint(0, 4, (10,))

In [27]:
bs = 3
# X = torch.rand(bs, 4, requires_grad=True)
# lab = torch.randint(0, 4, (bs,))
X = X0[:bs, :].clone().detach().requires_grad_(True)
lab = lab0[:bs].clone().detach()
Xn = X.detach().numpy()
labn = lab.numpy()

In [28]:
out1 = crit(X, lab)
out2 = critn(Xn, labn)
_out3 = sm(Xn)
out3 = cre(_out3, labn)
print(np.allclose(out1.detach(), out3))
print(np.allclose(out1.detach(), out2))

True
True


In [52]:
rest, resn, xts, xns, gt, gn = check_comb([crit], [sm, cre], X0, lab0, return_grad=True)

Forward is right True


In [71]:
md = nn.Linear(36, 4)
mdn = LinearLayer(36, 4)
mdn.W = md.weight.data.numpy()
mdn.b = md.bias.data.numpy()
sil = SiLU()
siln = SiLULayer()
sig = Sigmoid()
sign = SigmoidLayer()
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropyLoss()
c = Conv2d(3, 4, 3)
cn = ConvLayer(3, 4, 3)
cn.W = c.weight.data.numpy()
cn.b = c.bias.data.numpy()
f = Flatten()
fn = FlattenLayer()
trash = OneLayer()
d = Dropout()
dn = DropoutLayer()

In [72]:
bs = 10
X0 = torch.rand(bs, 3, 5, 5, requires_grad=True)
# X0 = torch.rand(bs, 4, 3, 3, requires_grad=True)
lab0 = torch.randint(0, 4, (bs,))

In [73]:
res = check_comb([c, f, md, d, crit], [cn, fn, mdn, dn, critn], X0, lab0, return_grad=True)

Forward is right False
but 0.009922383821972858


In [79]:
rest, resn, xts, xns, gt, gn = res


In [75]:
grad_idx = 1
print('\n'.join(map(lambda x: str(np.allclose(x[0], x[1])), zip(gt, gn, list(range(grad_idx))))))

False


In [93]:
print(*(t.shape for t in gt))
print(*(t.shape for t in gn))

torch.Size([10, 4]) torch.Size([10, 4]) torch.Size([10, 36]) torch.Size([10, 4, 3, 3]) torch.Size([10, 3, 5, 5])
(10, 4) (10, 4) (10, 36) (10, 4, 3, 3) (10, 3, 5, 5)


In [77]:
print(*(t.shape for t in gn))

(10, 4) (10, 4) (10, 36) (10, 4, 3, 3) (10, 3, 5, 5)


In [92]:
print(gt[1])
print(xts[-2])

tensor([[ 0.0633,  0.0000,  0.0000, -0.1653],
        [-0.0000,  0.0760,  0.0000,  0.0402],
        [ 0.0000, -0.1272,  0.0000,  0.0000],
        [-0.0000,  0.0809,  0.0456,  0.0000],
        [ 0.0000, -0.1228,  0.0339,  0.0000],
        [ 0.0000, -0.0000,  0.0510,  0.0362],
        [-0.1463,  0.0000,  0.0000,  0.0000],
        [-0.0000,  0.0000,  0.0602,  0.0377],
        [ 0.0542,  0.0000, -0.1470,  0.0000],
        [ 0.0000, -0.0991,  0.0422,  0.0247]])
tensor([[ 0.2163,  0.0000, -0.0000, -0.3864],
        [-0.0000,  0.5961,  0.0000, -0.0397],
        [ 0.0000,  0.5415, -0.0000, -0.0000],
        [ 0.0000,  0.7877,  0.2139, -0.0000],
        [ 0.0000,  0.5527, -0.2694, -0.0000],
        [-0.0000,  0.0000, -0.1006, -0.4424],
        [ 0.0966,  0.0000,  0.0000, -0.0000],
        [ 0.0000,  0.0000,  0.1641, -0.3050],
        [ 0.1539,  0.0000,  0.1315, -0.0000],
        [ 0.0000,  1.1375,  0.2652, -0.2711]], grad_fn=<MulBackward0>)


In [94]:
print(gn[1])
print(xns[-2])

[[ 0.          0.          0.10471173 -0.32511926]
 [-0.          0.15200438  0.          0.08049306]
 [ 0.         -0.          0.10395467  0.08471501]
 [-0.3215828   0.15933484  0.08976756  0.        ]
 [ 0.11777936 -0.2687593   0.          0.07546361]
 [ 0.         -0.          0.          0.07055595]
 [-0.          0.21036966  0.08224199  0.03920759]
 [-0.29422635  0.          0.11891925  0.07438882]
 [ 0.11197607  0.         -0.          0.        ]
 [ 0.08538949 -0.21904215  0.07563496  0.        ]]
[[ 0.0000000e+00  0.0000000e+00 -5.1120251e-02 -3.8643467e-01]
 [-0.0000000e+00  5.9605771e-01  0.0000000e+00 -3.9680533e-02]
 [ 0.0000000e+00  0.0000000e+00 -1.6320199e-02 -2.2098236e-01]
 [ 7.8727275e-02  7.8769195e-01  2.1390778e-01 -0.0000000e+00]
 [ 4.4446349e-01  5.5268353e-01 -0.0000000e+00 -6.9909543e-04]
 [-0.0000000e+00  0.0000000e+00 -0.0000000e+00 -4.4238818e-01]
 [ 0.0000000e+00  1.1267039e+00  1.8750355e-01 -5.5329198e-01]
 [ 4.6990812e-02  0.0000000e+00  1.6413398e-01 -