In [1]:
import torchvision
from torch import nn
from torch.nn import functional as F
import torch
from torchvision import transforms
from torch.utils import data
import torch.optim as optim
from PIL import Image

In [2]:
transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [3]:
train_data_path = "./train/"
train_data = torchvision.datasets.ImageFolder(root=train_data_path, transform=transforms)

In [4]:
val_data_path = "./val/"
val_data = torchvision.datasets.ImageFolder(root=val_data_path, transform=transforms)

In [5]:
# test_data_path = "./test/"
# test_data = torchvision.datasets.ImageFolder(root=test_data_path, transform=transforms)

In [6]:
batch_size = 512
train_data_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_data_loader  = data.DataLoader(val_data, batch_size=batch_size, shuffle=False)
# test_data_loader  = data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [7]:
!nvidia-smi --query --display=memory



Timestamp                           : Wed Jul 29 20:47:15 2020
Driver Version                      : 440.33.01
CUDA Version                        : 10.2

Attached GPUs                       : 1
GPU 00000000:03:00.0
    FB Memory Usage
        Total                       : 8118 MiB
        Used                        : 160 MiB
        Free                        : 7958 MiB
    BAR1 Memory Usage
        Total                       : 256 MiB
        Used                        : 5 MiB
        Free                        : 251 MiB



In [8]:
class CNNNet(nn.Module):

    def __init__(self, num_classes=2):
        super(CNNNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [9]:
model = CNNNet()

In [10]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model.to(device)

CNNNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4)

In [11]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

57012034

In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
    for epoch in range(epochs):
        training_loss = 0.0
        valid_loss = 0.0
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item()
        training_loss /= len(train_loader)

        model.eval()
        num_correct = 0
        num_examples = 0
        for batch in val_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            output = model(inputs)
            targets = targets.to(device)
            loss = loss_fn(output,targets)
            valid_loss += loss.data.item()
            correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        valid_loss /= len(val_loader)

        print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}, accuracy = {:.2f}'.format(
            epoch, training_loss,
            valid_loss, 
            num_correct / num_examples
        ))

In [14]:
%%time

train(model, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, val_data_loader, 20, device)

  correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)


Epoch: 0, Training Loss: 3.22, Validation Loss: 1.84, accuracy = 0.66
Epoch: 1, Training Loss: 1.60, Validation Loss: 0.65, accuracy = 0.66
Epoch: 2, Training Loss: 0.69, Validation Loss: 0.71, accuracy = 0.34
Epoch: 3, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 4, Training Loss: 0.69, Validation Loss: 0.73, accuracy = 0.34
Epoch: 5, Training Loss: 0.69, Validation Loss: 0.70, accuracy = 0.34
Epoch: 6, Training Loss: 0.68, Validation Loss: 0.74, accuracy = 0.34
Epoch: 7, Training Loss: 0.69, Validation Loss: 0.67, accuracy = 0.69
Epoch: 8, Training Loss: 0.69, Validation Loss: 0.66, accuracy = 0.72
Epoch: 9, Training Loss: 0.69, Validation Loss: 0.63, accuracy = 0.66
Epoch: 10, Training Loss: 0.66, Validation Loss: 0.61, accuracy = 0.68
Epoch: 11, Training Loss: 0.65, Validation Loss: 0.69, accuracy = 0.53
Epoch: 12, Training Loss: 0.64, Validation Loss: 0.61, accuracy = 0.66
Epoch: 13, Training Loss: 0.63, Validation Loss: 0.64, accuracy = 0.67
Epoch: 14, Train

In [15]:
# accuracy = 0.82

In [16]:
!nvidia-smi --query --display=memory



Timestamp                           : Wed Jul 29 20:49:26 2020
Driver Version                      : 440.33.01
CUDA Version                        : 10.2

Attached GPUs                       : 1
GPU 00000000:03:00.0
    FB Memory Usage
        Total                       : 8118 MiB
        Used                        : 3920 MiB
        Free                        : 4198 MiB
    BAR1 Memory Usage
        Total                       : 256 MiB
        Used                        : 5 MiB
        Free                        : 251 MiB



In [15]:
from torch.utils.checkpoint import checkpoint_sequential
import torch.nn as nn

class CheckpointedAlexNet(nn.Module):

    def __init__(self, num_classes=2):
        super(CheckpointedAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        
    def forward(self, x):
        x = checkpoint_sequential(self.features, 2, x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

In [16]:
model = CheckpointedAlexNet()

In [17]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model.to(device)

CheckpointedAlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, 

In [18]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

57012034

In [19]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [20]:
%%time

train(model, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, val_data_loader, 20, device)

  correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)


Epoch: 0, Training Loss: 1.33, Validation Loss: 1.90, accuracy = 0.66
Epoch: 1, Training Loss: 1.71, Validation Loss: 0.68, accuracy = 0.66
Epoch: 2, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.65
Epoch: 3, Training Loss: 0.73, Validation Loss: 0.72, accuracy = 0.34
Epoch: 4, Training Loss: 0.69, Validation Loss: 0.71, accuracy = 0.34
Epoch: 5, Training Loss: 0.70, Validation Loss: 0.69, accuracy = 0.35
Epoch: 6, Training Loss: 0.69, Validation Loss: 0.77, accuracy = 0.34
Epoch: 7, Training Loss: 0.70, Validation Loss: 0.68, accuracy = 0.66
Epoch: 8, Training Loss: 0.69, Validation Loss: 0.68, accuracy = 0.66
Epoch: 9, Training Loss: 0.69, Validation Loss: 0.70, accuracy = 0.34
Epoch: 10, Training Loss: 0.69, Validation Loss: 0.71, accuracy = 0.34
Epoch: 11, Training Loss: 0.68, Validation Loss: 0.70, accuracy = 0.34
Epoch: 12, Training Loss: 0.68, Validation Loss: 0.69, accuracy = 0.44
Epoch: 13, Training Loss: 0.66, Validation Loss: 0.66, accuracy = 0.67
Epoch: 14, Train

In [21]:
!nvidia-smi --query --display=memory



Timestamp                           : Wed Jul 29 20:46:50 2020
Driver Version                      : 440.33.01
CUDA Version                        : 10.2

Attached GPUs                       : 1
GPU 00000000:03:00.0
    FB Memory Usage
        Total                       : 8118 MiB
        Used                        : 3010 MiB
        Free                        : 5108 MiB
    BAR1 Memory Usage
        Total                       : 256 MiB
        Used                        : 5 MiB
        Free                        : 251 MiB



In [15]:
import torchvision.models as models
alexnet = models.alexnet(num_classes=2)

In [16]:
sum(p.numel() for p in alexnet.parameters() if p.requires_grad)

57012034

In [21]:
alexnet.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [23]:
optimizer = optim.Adam(alexnet.parameters(), lr=0.001)

In [24]:
train(alexnet, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, val_data_loader, 20, device)

  correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)


Epoch: 0, Training Loss: 1.85, Validation Loss: 0.68, accuracy = 0.66
Epoch: 1, Training Loss: 0.65, Validation Loss: 0.65, accuracy = 0.56
Epoch: 2, Training Loss: 0.57, Validation Loss: 0.45, accuracy = 0.75
Epoch: 3, Training Loss: 0.50, Validation Loss: 0.55, accuracy = 0.74
Epoch: 4, Training Loss: 0.49, Validation Loss: 0.48, accuracy = 0.74
Epoch: 5, Training Loss: 0.43, Validation Loss: 0.38, accuracy = 0.78
Epoch: 6, Training Loss: 0.38, Validation Loss: 0.37, accuracy = 0.79
Epoch: 7, Training Loss: 0.35, Validation Loss: 0.43, accuracy = 0.83
Epoch: 8, Training Loss: 0.35, Validation Loss: 0.36, accuracy = 0.82
Epoch: 9, Training Loss: 0.36, Validation Loss: 0.54, accuracy = 0.81
Epoch: 10, Training Loss: 0.28, Validation Loss: 0.40, accuracy = 0.80
Epoch: 11, Training Loss: 0.31, Validation Loss: 0.36, accuracy = 0.82
Epoch: 12, Training Loss: 0.30, Validation Loss: 0.47, accuracy = 0.75
Epoch: 13, Training Loss: 0.27, Validation Loss: 0.43, accuracy = 0.79
Epoch: 14, Train

In [37]:
import torchvision.models as models
alexnet = models.alexnet(pretrained=True)

In [38]:
alexnet.classifier = nn.Sequential(
    nn.Dropout(),
    nn.Linear(256 * 6 * 6, 4096),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Linear(4096, 2)
)

In [39]:
sum(p.numel() for p in alexnet.parameters() if p.requires_grad)

57012034

In [40]:
alexnet.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [41]:
optimizer = optim.Adam(alexnet.parameters(), lr=0.001)

In [42]:
train(alexnet, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, val_data_loader, 20, device)

  correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)


Epoch: 0, Training Loss: 3.72, Validation Loss: 0.69, accuracy = 0.66
Epoch: 1, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 2, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 3, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 4, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 5, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 6, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 7, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 8, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 9, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 10, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 11, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 12, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 13, Training Loss: 0.69, Validation Loss: 0.69, accuracy = 0.66
Epoch: 14, Train

In [43]:
model = torch.hub.load('pytorch/vision', 'resnet50', pretrained=True)

Downloading: "https://github.com/pytorch/vision/archive/master.zip" to /home/science/.cache/torch/hub/master.zip
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /home/science/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




In [45]:
model.fc = nn.Linear(2048, 2)

In [46]:
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [47]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [48]:
train(model, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, val_data_loader, 20, device)

  correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)


Epoch: 0, Training Loss: 0.49, Validation Loss: 3.02, accuracy = 0.81
Epoch: 1, Training Loss: 0.19, Validation Loss: 0.47, accuracy = 0.90
Epoch: 2, Training Loss: 0.12, Validation Loss: 0.30, accuracy = 0.91
Epoch: 3, Training Loss: 0.06, Validation Loss: 0.29, accuracy = 0.90
Epoch: 4, Training Loss: 0.05, Validation Loss: 0.50, accuracy = 0.85
Epoch: 5, Training Loss: 0.07, Validation Loss: 0.29, accuracy = 0.90
Epoch: 6, Training Loss: 0.04, Validation Loss: 0.25, accuracy = 0.88
Epoch: 7, Training Loss: 0.06, Validation Loss: 0.21, accuracy = 0.91
Epoch: 8, Training Loss: 0.03, Validation Loss: 0.21, accuracy = 0.88
Epoch: 9, Training Loss: 0.01, Validation Loss: 0.16, accuracy = 0.94
Epoch: 10, Training Loss: 0.05, Validation Loss: 0.52, accuracy = 0.89
Epoch: 11, Training Loss: 0.10, Validation Loss: 0.67, accuracy = 0.83
Epoch: 12, Training Loss: 0.03, Validation Loss: 0.26, accuracy = 0.87
Epoch: 13, Training Loss: 0.01, Validation Loss: 0.24, accuracy = 0.92
Epoch: 14, Train

In [49]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

23512130