In [1]:
import os
import torch
import torch.nn as nn 
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.optim as optim

from baseline.utils import *
from baseline.training import training

from Simclr_model.linear_classifier import LinearClassifier

In [2]:
train_data, test_data = get_data()
train_unlabeled_loader, train_labeled_loader, test_loader = get_loader(train_data, test_data, batch_size=50)
device = get_device()

 <function is_available at 0x00000221D42D7DC0> and can be used by 1 devices
The device used is NVIDIA GeForce GTX 1050


In [3]:
# from torchvision.models.vision_transformer import Encoder
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

class LinearLayer(nn.Module):
    def __init__(self,
                 in_features,
                 out_features,
                 use_bias = True,
                 use_bn = False,
                 **kwargs):
        super(LinearLayer, self).__init__(**kwargs)

        self.in_features = in_features
        self.out_features = out_features
        self.use_bias = use_bias
        self.use_bn = use_bn
        
        self.linear = nn.Linear(self.in_features, 
                                self.out_features, 
                                bias = self.use_bias and not self.use_bn)
        if self.use_bn:
             self.bn = nn.BatchNorm1d(self.out_features)

    def forward(self,x):
        x = self.linear(x)
        if self.use_bn:
            x = self.bn(x)
        return x

class ProjectionHead(nn.Module):
    def __init__(self,
                 in_features,
                 hidden_features,
                 out_features,
                 head_type = 'nonlinear',
                 **kwargs):
        super(ProjectionHead,self).__init__(**kwargs)
        self.in_features = in_features
        self.out_features = out_features
        self.hidden_features = hidden_features
        self.head_type = head_type

        if self.head_type == 'linear':
            self.layers = LinearLayer(self.in_features,self.out_features,False, True)
        elif self.head_type == 'nonlinear':
            self.layers = nn.Sequential(
                LinearLayer(self.in_features,self.hidden_features,True, True),
                nn.ReLU(),
                LinearLayer(self.hidden_features,self.out_features,False,True))
        
    def forward(self,x):
        x = self.layers(x)
        return x

        

In [4]:
GRAYSCALE = True
NUM_CLASSES = 10


def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out




class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes, grayscale):
        self.inplanes = 64
        if grayscale:
            in_dim = 1
        else:
            in_dim = 3
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(in_dim, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, (2. / n)**.5)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # because MNIST is already 1x1 here:
        # disable avg pooling
        #x = self.avgpool(x)
        
        x = x.view(x.size(0), -1)
        # logits = self.fc(x)
        # probas = F.softmax(logits, dim=1)
        return x



def resnet50(num_classes):
    """Constructs a ResNet-50 model."""
    model = ResNet(block=Bottleneck, 
                   layers=[3, 4, 6, 3],
                   num_classes=NUM_CLASSES,
                   grayscale=GRAYSCALE)
    return model

In [5]:
class PreModel(nn.Module):
    def __init__(self,base_model,base_out_layer, encoder):
        super().__init__()
        self.base_model = base_model
        self.base_out_layer = base_out_layer
        
        #PRETRAINED MODEL
        self.pretrained = encoder
        
        # modification ===============================
        # self.pretrained.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
        # self.pretrained.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # self.pretrained.maxpool = Identity()
        # self.pretrained.avgpool = Identity()
        
        self.pretrained.fc = Identity()
        
        for p in self.pretrained.parameters():
            p.requires_grad = True
        
        self.projector = ProjectionHead(2048, 2048, 128)
    

    def forward(self,x):
        out = self.pretrained(x)
        
        xp = self.projector(torch.squeeze(out))
        
        return xp

In [6]:
encoder = resnet50(NUM_CLASSES)
model = PreModel('resnet50','avgpool', encoder)
model = model.to(device)

In [7]:
PATH = './SIMCLR/Ahmed_simclr_resnet_256.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [8]:
for name, param in model.named_parameters():
    param.requires_grad = False

In [9]:
classifier = LinearClassifier()
#classifier = TwoLayersClassifier()
model.projector = classifier

In [10]:
output_fn = torch.nn.Softmax(dim=1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.1, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-3)

In [11]:
epochs = 25
train_class = training(train_loader=train_labeled_loader, val_loader=test_loader, metric=get_accuracy, device=device)
train_class.train(model=model, epochs=epochs, optimizer=optimizer, criterion=criterion, output_fn=output_fn, RGB = False, patience_LR = 3, patience_earlystop = 50)

  4%|▍         | 1/25 [00:16<06:25, 16.07s/it]

| Epoch: 1/25 | Train: Loss 163.4662 Accuracy : 0.1100 | Val: Loss 157.1745 Accuracy : 0.1137



  8%|▊         | 2/25 [00:25<04:46, 12.44s/it]

| Epoch: 2/25 | Train: Loss 203.0698 Accuracy : 0.1300 | Val: Loss 185.8165 Accuracy : 0.1217



 12%|█▏        | 3/25 [00:36<04:18, 11.75s/it]

| Epoch: 3/25 | Train: Loss 248.2756 Accuracy : 0.1300 | Val: Loss 228.2090 Accuracy : 0.1224



 16%|█▌        | 4/25 [00:46<03:51, 11.05s/it]

| Epoch: 4/25 | Train: Loss 171.5574 Accuracy : 0.1900 | Val: Loss 157.5150 Accuracy : 0.2257



 20%|██        | 5/25 [00:56<03:33, 10.68s/it]

| Epoch: 5/25 | Train: Loss 117.3797 Accuracy : 0.2300 | Val: Loss 114.8623 Accuracy : 0.2440



 24%|██▍       | 6/25 [01:07<03:19, 10.49s/it]

| Epoch: 6/25 | Train: Loss 106.8973 Accuracy : 0.2200 | Val: Loss 104.6337 Accuracy : 0.2562



 28%|██▊       | 7/25 [01:17<03:08, 10.48s/it]

| Epoch: 7/25 | Train: Loss 86.5678 Accuracy : 0.2700 | Val: Loss 93.0961 Accuracy : 0.2819



 32%|███▏      | 8/25 [01:28<02:59, 10.57s/it]

| Epoch: 8/25 | Train: Loss 63.2557 Accuracy : 0.2400 | Val: Loss 78.2127 Accuracy : 0.2751



 36%|███▌      | 9/25 [01:38<02:47, 10.44s/it]

| Epoch: 9/25 | Train: Loss 61.6041 Accuracy : 0.2300 | Val: Loss 86.0374 Accuracy : 0.2417



 40%|████      | 10/25 [01:48<02:37, 10.48s/it]

| Epoch: 10/25 | Train: Loss 50.9959 Accuracy : 0.3000 | Val: Loss 81.0079 Accuracy : 0.2777



 44%|████▍     | 11/25 [01:59<02:27, 10.55s/it]

| Epoch: 11/25 | Train: Loss 43.2591 Accuracy : 0.3300 | Val: Loss 78.2159 Accuracy : 0.2748



 48%|████▊     | 12/25 [02:09<02:15, 10.39s/it]

| Epoch: 12/25 | Train: Loss 39.4333 Accuracy : 0.4000 | Val: Loss 80.3703 Accuracy : 0.2740



 52%|█████▏    | 13/25 [02:21<02:08, 10.73s/it]

| Epoch: 13/25 | Train: Loss 31.9435 Accuracy : 0.4300 | Val: Loss 78.3945 Accuracy : 0.2719



 52%|█████▏    | 13/25 [02:31<02:19, 11.66s/it]


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(8,8))

plt.plot(range(1,epochs + 1), train_class.loss_train, label="train loss" )
plt.plot(range(1, epochs +1), train_class.loss_valid, label="valid loss")
plt.xlabel("epochs")
plt.ylabel("Loss")
plt.title("Loss functions one hidden layer, BS=64")
plt.legend()

#plt.savefig("Loss function OWL BS=64.png")
plt.show()

In [None]:
plt.figure(figsize=(8,8))

plt.plot(range(1,epochs + 1), train_class.acc_train, label="train accuracy" )
plt.plot(range(1, epochs +1), train_class.acc_valid, label="valid accuracy")
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.title("Accuracy functions one hidden layer, BS=64")
plt.legend()

#plt.savefig("Accuracy function OWL BS=64.png")