In [2]:
!nvidia-smi

Tue Sep  6 11:16:52 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
| N/A   56C    P0    44W /  N/A |      0MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# Imports
import torch
from tqdm import tqdm
from torchvision import datasets, transforms


In [4]:
from apex import amp

In [5]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
torch.backends.cudnn.deterministic = True


In [35]:
# Download resnet 100 and put in gpu
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet101', pretrained=False)
# Replace last layer with 10 nodes
model.fc = torch.nn.Linear(2048, 10)

# Put model in gpu
model = model.cuda()


Using cache found in /home/venom/.cache/torch/hub/pytorch_vision_v0.6.0


In [7]:
torch.__version__


'1.12.1+cu116'

In [8]:
# Download cifar 10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False,
                           download=True, transform=transform)


Files already downloaded and verified
Files already downloaded and verified


In [9]:
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)


In [26]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.CrossEntropyLoss()

In [22]:
model, optimizer = amp.initialize(model, optimizer, opt_level="O1",patch_torch_functions=True,num_losses=5)

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


In [17]:
# Acc before training

model.eval()
correct = 0
total = 0
with torch.no_grad():
    with tqdm(testloader, unit="batch") as t2epoch:
        for data, target in t2epoch:
            t2epoch.set_description("Test")
            data, target = data.cuda(), target.cuda()
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            t2epoch.set_postfix(Accuracy=(100 * correct / total))


Test: 100%|██████████| 157/157 [00:05<00:00, 31.34batch/s, Accuracy=10]  


In [18]:
# Train
for epoch in range(1, 6):
    model.train()
    with tqdm(trainloader, unit="batch") as tepoch:
        for data, target in tepoch:
            tepoch.set_description(f"Epoch {epoch}")
            data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            optimizer.step()
            tepoch.set_postfix(loss=loss.item())

    # Test
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as t2epoch:
            for data, target in t2epoch:
                t2epoch.set_description(f"Epoch {epoch}")
                data, target = data.cuda(), target.cuda()
                output = model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()
                t2epoch.set_postfix(Accuracy=(100 * correct / total))

    print(" ")


Epoch 1:   1%|          | 4/782 [00:01<02:40,  4.84batch/s, loss=2.6] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0


Epoch 1:   1%|          | 8/782 [00:01<01:24,  9.21batch/s, loss=2.58]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 1024.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 512.0


Epoch 1:   2%|▏         | 14/782 [00:01<00:59, 12.88batch/s, loss=2.51]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 256.0


Epoch 1: 100%|██████████| 782/782 [00:54<00:00, 14.25batch/s, loss=2.36]
Epoch 1: 100%|██████████| 157/157 [00:02<00:00, 52.48batch/s, Accuracy=24.1]


 


Epoch 2: 100%|██████████| 782/782 [00:53<00:00, 14.69batch/s, loss=2.17]
Epoch 2: 100%|██████████| 157/157 [00:02<00:00, 52.83batch/s, Accuracy=33]  


 


Epoch 3: 100%|██████████| 782/782 [00:51<00:00, 15.29batch/s, loss=1.41]
Epoch 3: 100%|██████████| 157/157 [00:02<00:00, 52.66batch/s, Accuracy=38.4]


 


Epoch 4: 100%|██████████| 782/782 [00:50<00:00, 15.34batch/s, loss=1.74]
Epoch 4: 100%|██████████| 157/157 [00:02<00:00, 52.90batch/s, Accuracy=42.8]


 


Epoch 5: 100%|██████████| 782/782 [00:50<00:00, 15.40batch/s, loss=1.5] 
Epoch 5: 100%|██████████| 157/157 [00:02<00:00, 53.15batch/s, Accuracy=45.4]

 





In [12]:
!nvidia-smi

Tue Sep  6 11:07:05 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
| N/A   64C    P0    47W /  N/A |   2793MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

Lower ram usage :think:

During the training, the GPU did not use more than 50%.

This means we can bump up the batch size and get better results. Let’s try that too.

In [23]:
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)

testloader = torch.utils.data.DataLoader(
    testset, batch_size=128, shuffle=False, num_workers=4, pin_memory=True)


In [24]:
# Train
for epoch in range(1, 6):
    model.train()
    with tqdm(trainloader, unit="batch") as tepoch:
        for data, target in tepoch:
            tepoch.set_description(f"Epoch {epoch}")
            data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            optimizer.step()
            tepoch.set_postfix(loss=loss.item())

    # Test
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as t2epoch:
            for data, target in t2epoch:
                t2epoch.set_description(f"Epoch {epoch}")
                data, target = data.cuda(), target.cuda()
                output = model(data)
                _, predicted = torch.max(output.data, 1)
                total += target.size(0)
                correct += (predicted == target).sum().item()
                t2epoch.set_postfix(Accuracy=(100 * correct / total))

    print(" ")


Epoch 1:   1%|          | 3/391 [00:01<02:39,  2.43batch/s, loss=2.72]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


Epoch 1:   2%|▏         | 7/391 [00:01<01:01,  6.21batch/s, loss=2.71]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 1024.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 512.0


Epoch 1: 100%|██████████| 391/391 [00:29<00:00, 13.20batch/s, loss=2.06]
Epoch 1: 100%|██████████| 79/79 [00:01<00:00, 41.70batch/s, Accuracy=23.6]


 


Epoch 2: 100%|██████████| 391/391 [00:27<00:00, 14.00batch/s, loss=1.99]
Epoch 2: 100%|██████████| 79/79 [00:01<00:00, 41.12batch/s, Accuracy=30.6]


 


Epoch 3: 100%|██████████| 391/391 [00:28<00:00, 13.93batch/s, loss=1.83]
Epoch 3: 100%|██████████| 79/79 [00:01<00:00, 39.74batch/s, Accuracy=35.2]


 


Epoch 4: 100%|██████████| 391/391 [00:27<00:00, 13.99batch/s, loss=1.76]
Epoch 4: 100%|██████████| 79/79 [00:01<00:00, 41.60batch/s, Accuracy=38.1]


 


Epoch 5: 100%|██████████| 391/391 [00:27<00:00, 14.02batch/s, loss=1.63]
Epoch 5: 100%|██████████| 79/79 [00:01<00:00, 41.22batch/s, Accuracy=40.8]

 





In [25]:
!nvidia-smi

Tue Sep  6 11:26:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
| N/A   63C    P0    47W /  N/A |   4327MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces