In [2]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms, datasets
from tqdm import tqdm
import numpy as np
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
train_size = int(0.9 * len(trainset))
valid_size = int(0.1 * len(trainset))
_,validset = torch.utils.data.random_split(trainset, [train_size, valid_size])


train_loader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)
valid_loader = torch.utils.data.DataLoader(
    validset, batch_size = 100, shuffle = False, num_workers = 2
)


testset = datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
class Bottleneck_with_Tanh(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck_with_Tanh, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = torch.tanh(self.bn1(self.conv1(x)))
        out = torch.tanh(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = torch.tanh(out)
        return out

In [5]:
class ResNet_with_Tanh(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_with_Tanh, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.tanh(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [6]:
class Bottleneck_with_Relu(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck_with_Relu, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [7]:
class ResNet_with_Relu(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_with_Relu, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [8]:
class Bottleneck_without_residual_learning(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck_without_residual_learning, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        #out += self.shortcut(x)
        out = F.relu(out)
        return out

In [9]:
class ResNet_without_residual_learning(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_without_residual_learning, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [10]:
def ResNet50_with_Tanh():
    return ResNet_with_Tanh(Bottleneck_with_Tanh, [3, 4, 6, 3])
def ResNet50_with_Relu():
    return ResNet_with_Relu(Bottleneck_with_Relu, [3, 4, 6, 3])
def ResNet50_without_residual_learning():
    return ResNet_without_residual_learning(Bottleneck_without_residual_learning, [3, 4, 6, 3])
def ResNet152_with_Tanh():
    return ResNet_with_Tanh(Bottleneck_with_Tanh, [3, 8, 36, 3])
def ResNet152_with_Relu():
    return ResNet_with_Relu(Bottleneck_with_Relu, [3, 8, 36, 3])
def ResNet152_without_residual_learning():
    return ResNet_without_residual_learning(Bottleneck_without_residual_learning, [3, 8, 36, 3])

In [11]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"Using PyTorch version: {torch.__version__}, Device: {DEVICE}")

model = ResNet152_with_Tanh().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
mse_loss = nn.MSELoss()

Using PyTorch version: 1.13.1+cu116, Device: cuda


In [13]:
def train(model, train_loader, optimizer, bar):
    model.train()
    loss_sum = 0
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss_sum += loss.item()
        loss.backward()
        optimizer.step()
        if batch_idx % 30 == 0:
            bar.set_postfix(
                Train_Loss=f"{loss.item():0.3f}",
            )
    return loss_sum / len(train_loader)


In [17]:
def parameters_as_list(model,layer):
    ret = {}
    if layer == 1:
        for name,p in model.layer1._modules['0'].conv2.named_parameters():
            ret[name] = p.cpu().detach()
    elif layer == 4:
        for name,p in model.layer4._modules['0'].conv2.named_parameters():
            ret[name] = p.cpu().detach()
    return ret

## Activation(Relu vs Tanh)

In [182]:
EPOCHS = 100
train_loss_save = []
test_loss_save = []
train_acc_save = []
test_acc_save = []
bar = tqdm(range(EPOCHS), total=EPOCHS, desc='Train ')
writer = SummaryWriter()
Conv_1 = parameters_as_list(model,1)
Conv_5 = parameters_as_list(model,4)
for Epoch in bar:
    train(model, train_loader, optimizer, bar)
    
    new_Conv_1 = parameters_as_list(model,1)
    new_Conv_5 = parameters_as_list(model,4)
    if Epoch % 10 == 0:
        layer_1_weight_gap = mse_loss(new_Conv_1['weight'],Conv_1['weight'])
        layer_5_weight_gap = mse_loss(new_Conv_5['weight'],Conv_5['weight'])
        writer.add_scalar("parameter_gap/layer_1", layer_1_weight_gap, Epoch)
        writer.add_scalar("parameter_gap/layer_5", layer_5_weight_gap, Epoch)
        writer.add_scalar("score", layer_1_weight_gap / layer_5_weight_gap, Epoch)
    Conv_1 = new_Conv_1
    Conv_5 = new_Conv_5    

writer.flush()
writer.close()

Train : 100%|██████████| 100/100 [6:40:54<00:00, 240.55s/it, Train_Loss=0.543] 


In [183]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"Using PyTorch version: {torch.__version__}, Device: {DEVICE}")

model = ResNet152_with_Relu().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
mse_loss = nn.MSELoss()

Using PyTorch version: 1.13.1+cu116, Device: cuda


In [184]:
EPOCHS = 100
train_loss_save = []
test_loss_save = []
train_acc_save = []
test_acc_save = []
bar = tqdm(range(EPOCHS), total=EPOCHS, desc='Train ')
writer = SummaryWriter()
Conv_1 = parameters_as_list(model,1)
Conv_5 = parameters_as_list(model,4)
for Epoch in bar:
    train(model, train_loader, optimizer, bar)
    
    new_Conv_1 = parameters_as_list(model,1)
    new_Conv_5 = parameters_as_list(model,4)
    if Epoch % 10 == 0:
        layer_1_weight_gap = mse_loss(new_Conv_1['weight'],Conv_1['weight'])
        layer_5_weight_gap = mse_loss(new_Conv_5['weight'],Conv_5['weight'])
        writer.add_scalar("parameter_gap/layer_1", layer_1_weight_gap, Epoch)
        writer.add_scalar("parameter_gap/layer_5", layer_5_weight_gap, Epoch)
        writer.add_scalar("score", layer_1_weight_gap / layer_5_weight_gap, Epoch)
    Conv_1 = new_Conv_1
    Conv_5 = new_Conv_5    

writer.flush()
writer.close()

Train : 100%|██████████| 100/100 [6:44:48<00:00, 242.89s/it, Train_Loss=0.067] 


In [27]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"Using PyTorch version: {torch.__version__}, Device: {DEVICE}")

model = ResNet50_without_residual_learning().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
mse_loss = nn.MSELoss()

Using PyTorch version: 1.13.1+cu116, Device: cuda


In [28]:
EPOCHS = 100
train_loss_save = []
test_loss_save = []
train_acc_save = []
test_acc_save = []
bar = tqdm(range(EPOCHS), total=EPOCHS, desc='Train ')
writer = SummaryWriter(comment="50_without_Relu")
Conv_1 = parameters_as_list(model,1)
Conv_5 = parameters_as_list(model,4)
for Epoch in bar:
    train(model, train_loader, optimizer, bar)
    
    new_Conv_1 = parameters_as_list(model,1)
    new_Conv_5 = parameters_as_list(model,4)
    if Epoch % 10 == 0:
        layer_1_weight_gap = mse_loss(new_Conv_1['weight'],Conv_1['weight'])
        layer_5_weight_gap = mse_loss(new_Conv_5['weight'],Conv_5['weight'])
        writer.add_scalar("parameter_gap/layer_1", layer_1_weight_gap, Epoch)
        writer.add_scalar("parameter_gap/layer_5", layer_5_weight_gap, Epoch)
        writer.add_scalar("score", layer_1_weight_gap / layer_5_weight_gap, Epoch)
    Conv_1 = new_Conv_1
    Conv_5 = new_Conv_5    

writer.flush()
writer.close()

Train : 100%|██████████| 100/100 [2:19:03<00:00, 83.43s/it, Train_Loss=0.038] 


# Residual connection

In [15]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

print(f"Using PyTorch version: {torch.__version__}, Device: {DEVICE}")

model = ResNet152_without_residual_learning().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
mse_loss = nn.MSELoss()

Using PyTorch version: 1.13.1+cu116, Device: cuda


In [18]:
EPOCHS = 100
train_loss_save = []
test_loss_save = []
train_acc_save = []
test_acc_save = []
bar = tqdm(range(EPOCHS), total=EPOCHS, desc='Train ')
writer = SummaryWriter(comment="152_without_Relu")
Conv_1 = parameters_as_list(model,1)
Conv_5 = parameters_as_list(model,4)
for Epoch in bar:
    train_loss = train(model, train_loader, optimizer, bar)
    
    new_Conv_1 = parameters_as_list(model,1)
    new_Conv_5 = parameters_as_list(model,4)
    writer.add_scalar("train/loss", train_loss, Epoch)
    if Epoch % 10 == 0:
        layer_1_weight_gap = mse_loss(new_Conv_1['weight'],Conv_1['weight'])
        layer_5_weight_gap = mse_loss(new_Conv_5['weight'],Conv_5['weight'])
        
        writer.add_scalar("parameter_gap/layer_1", layer_1_weight_gap, Epoch)
        writer.add_scalar("parameter_gap/layer_5", layer_5_weight_gap, Epoch)
        writer.add_scalar("score", layer_1_weight_gap / layer_5_weight_gap, Epoch)
    Conv_1 = new_Conv_1
    Conv_5 = new_Conv_5    

writer.flush()
writer.close()

Train :   0%|          | 0/100 [00:10<?, ?it/s]
Train :  92%|█████████▏| 92/100 [5:28:15<28:32, 214.08s/it, Train_Loss=1.665]


KeyboardInterrupt: 

In [19]:
writer.flush()
writer.close()