In [61]:
import torch

In [62]:
x = torch.randn(2048, 4096)
u, s, v = torch.svd(x)
print(s)

torch.nn.init.xavier_normal_(x, gain=1.0)
u, s, v = torch.svd(x)
print(s)

torch.nn.init.xavier_uniform_(x, gain=1.0)
u, s, v = torch.svd(x)
print(s)

torch.nn.init.kaiming_normal_(x, a=0, mode='fan_in', nonlinearity='leaky_relu')
u, s, v = torch.svd(x)
print(s)

torch.nn.init.kaiming_uniform_(x, a=0, mode='fan_in', nonlinearity='leaky_relu')
u, s, v = torch.svd(x)
print(s)


torch.nn.init.kaiming_normal_(x, a=0, mode='fan_out', nonlinearity='leaky_relu')
u, s, v = torch.svd(x)
print(s)

torch.nn.init.kaiming_uniform_(x, a=0, mode='fan_out', nonlinearity='leaky_relu')
u, s, v = torch.svd(x)
print(s)

tensor([108.8709, 108.6249, 108.3073,  ...,  19.2459,  19.2225,  18.7691])
tensor([1.9698, 1.9577, 1.9563,  ..., 0.3469, 0.3422, 0.3386])
tensor([1.9707, 1.9615, 1.9569,  ..., 0.3467, 0.3446, 0.3431])
tensor([2.4029, 2.3983, 2.3899,  ..., 0.4255, 0.4195, 0.4146])
tensor([2.4085, 2.3986, 2.3917,  ..., 0.4243, 0.4221, 0.4122])
tensor([3.4034, 3.3960, 3.3882,  ..., 0.5975, 0.5963, 0.5911])
tensor([3.4142, 3.4032, 3.3970,  ..., 0.6015, 0.5951, 0.5892])


In [70]:
import torch.nn as nn
import numpy as numpy

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock,self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) # 3x3 conv
        self.bn1 = nn.BatchNorm2d(planes) # batch norm
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) # 3x3 conv
        self.bn2 = nn.BatchNorm2d(planes) # batch norm

        self.shortcut = nn.Sequential() # identity
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes,self.expansion*planes,kernel_size=1,stride=stride,bias=False), # 1x1 conv
                nn.BatchNorm2d(self.expansion*planes) # batch norm
            )

    def forward(self,x):
        out = nn.ReLU()(self.bn1(self.conv1(x))) # relu -> batch norm -> conv
        out = self.bn2(self.conv2(out)) # batch norm -> conv
        out += self.shortcut(x) # identity
        out = nn.ReLU()(out) # relu
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet,self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1,bias=False) # 3x3 conv
        self.bn1 = nn.BatchNorm2d(64) # batch norm
        self.layer1 = self._make_layer(block,64,num_blocks[0],stride=1) # 3x3 conv
        self.layer2 = self._make_layer(block,128,num_blocks[1],stride=2) # 3x3 conv
        self.layer3 = self._make_layer(block,256,num_blocks[2],stride=2) # 3x3 conv
        self.layer4 = self._make_layer(block,512,num_blocks[3],stride=2) # 3x3 conv
        self.linear = nn.Linear(512*block.expansion,num_classes) # linear

    def _make_layer(self,block,planes,num_blocks,stride):
        strides = [stride] + [1]*(num_blocks-1) # [1,1,1,1] for stride=1 or [2,1,1,1] for stride=2
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes,planes,stride)) # append basic block
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers) # sequential

    def forward(self,x):
        out = nn.ReLU()(self.bn1(self.conv1(x))) # relu -> batch norm -> conv
        out = self.layer1(out) # basic block
        out = self.layer2(out) # basic block
        out = self.layer3(out) # basic block
        out = self.layer4(out) # basic block
        out = nn.AvgPool2d(4)(out) # average pooling
        out = out.view(out.size(0),-1) # flatten
        out = self.linear(out) # linear
        return out

def ResNet18():
    return ResNet(BasicBlock,[2,2,2,2])


In [64]:
import torch.nn as nn
import numpy as numpy

class BasicBlockNoRes(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlockNoRes,self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) # 3x3 conv
        self.bn1 = nn.BatchNorm2d(planes) # batch norm
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) # 3x3 conv
        self.bn2 = nn.BatchNorm2d(planes) # batch norm

        self.shortcut = nn.Sequential() # identity
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes,self.expansion*planes,kernel_size=1,stride=stride,bias=False), # 1x1 conv
                nn.BatchNorm2d(self.expansion*planes) # batch norm
            )

    def forward(self,x):
        out = nn.ReLU()(self.bn1(self.conv1(x))) # relu -> batch norm -> conv
        out = self.bn2(self.conv2(out)) # batch norm -> conv
        # out += self.shortcut(x) # identity
        out = nn.ReLU()(out) # relu
        return out

def ResNet18nores():
    return ResNet(BasicBlockNoRes,[2,2,2,2])


In [73]:

model = ResNet18()
model_nores = ResNet18nores()
times = 100

In [74]:
from tqdm import trange
max_lip = 0
max_lip_nr = 0
for i in trange(times):
    input = torch.randn(4,3,32,32)
    eps = 1e-7*torch.randn(4,3,32,32)
    p = 1

    x1 = input
    x2 = input + eps

    y1 = model(x1)
    y2 = model(x2)

    y1_nr = model_nores(x1)
    y2_nr = model_nores(x2)

    # lip = torch.max(torch.norm(y1-y2,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    # lip_nr = torch.max(torch.norm(y1_nr-y2_nr,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    lip = torch.max(torch.sqrt(torch.sum(torch.pow(y1-y2,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))
    lip_nr = torch.max(torch.sqrt(torch.sum(torch.pow(y1_nr-y2_nr,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))

    if lip > max_lip:
        max_lip = lip
    if lip_nr > max_lip_nr:
        max_lip_nr = lip_nr

print(max_lip, max_lip_nr)


100%|██████████| 100/100 [00:07<00:00, 12.67it/s]

tensor(79.3784, grad_fn=<DivBackward0>) tensor(114.5417, grad_fn=<DivBackward0>)





In [75]:
# xavier_normal the model and model_nores
for m in model.modules():
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    # if isinstance(m, nn.Linear):
    #     torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    if isinstance(m, nn.BatchNorm2d):
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 0)

for m in model_nores.modules():
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    # if isinstance(m, nn.Linear):
    #     torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    if isinstance(m, nn.BatchNorm2d):
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 0)


from tqdm import trange
max_lip = 0
max_lip_nr = 0
for i in trange(times):
    input = torch.randn(4,3,32,32)
    eps = 1e-7*torch.randn(4,3,32,32)
    p = 1

    x1 = input
    x2 = input + eps

    y1 = model(x1)
    y2 = model(x2)

    y1_nr = model_nores(x1)
    y2_nr = model_nores(x2)

    # lip = torch.max(torch.norm(y1-y2,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    # lip_nr = torch.max(torch.norm(y1_nr-y2_nr,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    lip = torch.max(torch.sqrt(torch.sum(torch.pow(y1-y2,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))
    lip_nr = torch.max(torch.sqrt(torch.sum(torch.pow(y1_nr-y2_nr,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))

    if lip > max_lip:
        max_lip = lip
    if lip_nr > max_lip_nr:
        max_lip_nr = lip_nr

print(max_lip, max_lip_nr)

100%|██████████| 100/100 [00:08<00:00, 12.02it/s]

tensor(76.4904, grad_fn=<DivBackward0>) tensor(108.6592, grad_fn=<DivBackward0>)





In [76]:
# kaiming_normal the model and model_nores
for m in model.modules():
    if isinstance(m, nn.Conv2d):
        torch.nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
    # if isinstance(m, nn.Linear):
    #     torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    if isinstance(m, nn.BatchNorm2d):
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 0)

for m in model_nores.modules():
    if isinstance(m, nn.Conv2d):
        torch.nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
    # if isinstance(m, nn.Linear):
    #     torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    if isinstance(m, nn.BatchNorm2d):
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 0)


from tqdm import trange
max_lip = 0
max_lip_nr = 0
for i in trange(times):
    input = torch.randn(4,3,32,32)
    eps = 1e-7*torch.randn(4,3,32,32)
    p = 1

    x1 = input
    x2 = input + eps

    y1 = model(x1)
    y2 = model(x2)

    y1_nr = model_nores(x1)
    y2_nr = model_nores(x2)

    # lip = torch.max(torch.norm(y1-y2,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    # lip_nr = torch.max(torch.norm(y1_nr-y2_nr,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    lip = torch.max(torch.sqrt(torch.sum(torch.pow(y1-y2,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))
    lip_nr = torch.max(torch.sqrt(torch.sum(torch.pow(y1_nr-y2_nr,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))

    if lip > max_lip:
        max_lip = lip
    if lip_nr > max_lip_nr:
        max_lip_nr = lip_nr

print(max_lip, max_lip_nr)

100%|██████████| 100/100 [00:08<00:00, 12.40it/s]

tensor(76.7082, grad_fn=<DivBackward0>) tensor(116.1856, grad_fn=<DivBackward0>)





In [77]:
# kaiming_normal the model and model_nores
for m in model.modules():
    if isinstance(m, nn.Conv2d):
        torch.nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out', nonlinearity='relu')
    # if isinstance(m, nn.Linear):
    #     torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    if isinstance(m, nn.BatchNorm2d):
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 0)

for m in model_nores.modules():
    if isinstance(m, nn.Conv2d):
        torch.nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out', nonlinearity='relu')
    # if isinstance(m, nn.Linear):
    #     torch.nn.init.xavier_normal_(m.weight, gain=1.0)
    if isinstance(m, nn.BatchNorm2d):
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 0)


from tqdm import trange
max_lip = 0
max_lip_nr = 0
for i in trange(times):
    input = torch.randn(4,3,32,32)
    eps = 1e-7*torch.randn(4,3,32,32)
    p = 1

    x1 = input
    x2 = input + eps

    y1 = model(x1)
    y2 = model(x2)

    y1_nr = model_nores(x1)
    y2_nr = model_nores(x2)

    # lip = torch.max(torch.norm(y1-y2,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    # lip_nr = torch.max(torch.norm(y1_nr-y2_nr,p=p,dim=1))/torch.max(torch.norm(x1-x2,p=p,dim=1))
    lip = torch.max(torch.sqrt(torch.sum(torch.pow(y1-y2,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))
    lip_nr = torch.max(torch.sqrt(torch.sum(torch.pow(y1_nr-y2_nr,2),dim=1)))/torch.max(torch.sqrt(torch.sum(torch.pow(x1-x2,2),dim=1)))

    if lip > max_lip:
        max_lip = lip
    if lip_nr > max_lip_nr:
        max_lip_nr = lip_nr

print(max_lip, max_lip_nr)

100%|██████████| 100/100 [00:07<00:00, 12.83it/s]

tensor(80.1550, grad_fn=<DivBackward0>) tensor(106.0311, grad_fn=<DivBackward0>)



