In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

from scipy.io import loadmat
from torch.autograd import Variable
from torchvision.models import efficientnet_b0
from torch.nn import (
    AdaptiveAvgPool2d,
    Conv2d,
    Linear,
    SiLU,
    Sigmoid
)

from numpy_utils.layers import (
    Layer,
    LinearLayer,
    ConvLayer,
    SiLULayer,
    SigmoidLayer,
    CrossEntropyLoss,
    CrossEntropyCost,
    SoftmaxLayer
)
from data_utils.dataset import CarsDataset

In [2]:
# plt.rcParams['figure.figsize'] = (15, 5)
%load_ext autoreload
%autoreload 2

In [5]:
def annot_mat_to_csv(mat_path, csv_path):
    meta = loadmat(mat_path)
    annots = meta['annotations']
    c_name = annots[:, :][['class', 'fname']].squeeze(0)
    c = list(map(lambda x: x.item(), c_name['class']))
    name = list(map(lambda x: x.item(), c_name['fname']))
    df = pd.DataFrame({
        'class': c,
        'fname': name
    })
    df.to_csv(csv_path, index=False)

In [6]:
annot_mat_to_csv('../data/cars_train_annos.mat', '../data/train.csv')

In [112]:
dts = CarsDataset('../data/train', '../data/train.csv')

In [132]:
it = iter(dts)
hws = np.array((list(map(lambda x: x[0].size, it))))
h, w = map(lambda x: x.squeeze(1), np.split(hws, 2, axis=1))
print(h.shape, w.shape)

(8144,) (8144,)


In [135]:
print(len(np.unique(h)))
print(len(np.unique(w)))

654
829


In [31]:
def compare(X, ft, fn, ac=True):
    out = ft(X).detach().numpy()
    outn = fn(X.detach().numpy())
    print('MSE', np.power(out - outn, 2).sum())
    if ac:
        print('eq', np.allclose(out, outn))

In [19]:
model = efficientnet_b0(weights=None)

In [7]:
lt = Linear(512, 128)
ln = LinearLayer(512, 128)

In [12]:
ln.W = lt.weight.data.numpy()
ln.b = lt.bias.data.numpy()

In [13]:
X = torch.rand((1, 512))
Xn = X.numpy()

In [14]:
out = lt(X).detach().numpy()
outn = ln(Xn)
print(np.power(out - outn, 2).sum())
print(np.allclose(out, outn))

6.5755734e-13
False


In [15]:
ct = Conv2d(3, 4, 3)
cn = ConvLayer(3, 4, 3)
wt, bt = ct._parameters.values()
# cn.W = np.transpose(wt.data.numpy(), (0, 1, 2, 3))
cn.W = wt.data.numpy()
cn.b = bt.data.numpy()

In [16]:
X = torch.rand(4, 3, 5, 5)
Xn = X.numpy()
# Xn = np.arange(48).reshape(1, 3, 4, 4)

In [17]:
out = ct(X).detach().numpy()
outn = cn(Xn)
print(*(x.shape for x in (out, outn)))

(4, 4, 3, 3) (4, 4, 3, 3)


In [18]:
print(np.power(out - outn, 2).sum())
# print(np.power(out - outn2, 2).sum())
# print(np.power(out - outn3, 2).sum())
# print(np.power(out - outn4, 2).sum())
print(np.allclose(out, outn))
# print(np.allclose(out, outn2))
# print(np.allclose(out, outn3))
# print(np.allclose(out, outn4))

2.0724481e-13
True


In [41]:
X = torch.rand(5, 3, 4, 4)
compare(X, SiLU(), SiLULayer())
compare(X, Sigmoid(), SigmoidLayer())

MSE 1.339315e-13
eq True
MSE 1.314504e-13
eq True


In [44]:
X = torch.rand(5, 3, 16, 16)
avgpool = AdaptiveAvgPool2d(output_size=1)
out = avgpool(X)
print(out.shape)

torch.Size([5, 3, 1, 1])


In [20]:
class Md(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.l1 = nn.Linear(8, 4)
        self.act1 = nn.Sigmoid()

    def forward(self, X):
        x = self.l1(X)
        x = self.act1(x)
        return x

In [79]:
# md = Md()
md = nn.Linear(8, 4)
mdn = LinearLayer(8, 4)
mdn.W = md.weight.data.numpy()
mdn.b = md.bias.data.numpy()
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropy()

In [80]:
X = torch.rand(5, 8)
lab = torch.randint(0, 4, (5,))
Xn = X.numpy()
labn = lab.numpy()

In [82]:
out = md(X)
outn = mdn(Xn)
loss = crit(out, lab)
lossn = critn(outn, labn)

tensor(1.3982, grad_fn=<NllLossBackward0>)
1.3982131
True


In [94]:
crit = nn.CrossEntropyLoss(reduction='mean')
critn = CrossEntropyLoss()
sm = SoftmaxLayer()
cre = CrossEntropyCost()

In [95]:
X0 = torch.rand(10, 4, requires_grad=True)
lab0 = torch.randint(0, 4, (10,))

In [96]:
bs = 3
# X = torch.rand(bs, 4, requires_grad=True)
# lab = torch.randint(0, 4, (bs,))
X = X0[:bs, :].clone().detach().requires_grad_(True)
lab = lab0[:bs].clone().detach()
Xn = X.detach().numpy()
labn = lab.numpy()

In [97]:
out1 = crit(X, lab)
out2 = critn(Xn, labn)
_out3 = sm(Xn)
out3 = cre(_out3, labn)
print(np.allclose(out1.detach(), out3))
print(np.allclose(out1.detach(), out2))

True
True


In [98]:
out1.backward()
print(X.grad)

tensor([[-0.2752,  0.0611,  0.1246,  0.0895],
        [ 0.1351, -0.2702,  0.0630,  0.0721],
        [ 0.0962,  0.1072,  0.0645, -0.2680]])


In [102]:
back = cre.backward()
# print(back)
back1 = sm.backward(back)
print(back1)

[[-0.27515164  0.06105502  0.1245793   0.08951733]
 [ 0.13511956 -0.27021483  0.06302248  0.0720728 ]
 [ 0.09622362  0.10722739  0.06450398 -0.26795498]]


In [101]:
bck = critn.backward(out2)
print(bck)

[[-0.27515167  0.06105502  0.1245793   0.08951733]
 [ 0.13511957 -0.27021486  0.06302249  0.07207281]
 [ 0.09622362  0.10722739  0.06450398 -0.26795498]]
