# Simple Example of using WebLoader with PyTorch

In [1]:
%matplotlib inline

In [2]:
from importlib import reload

import os
import numpy as np

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
from torchvision import models

import webloader as wl

from IPython import display
import matplotlib.pyplot as plt
import torchtrainers as tt

In [3]:
mean = np.array([0.485, 0.456, 0.406], "f")
std = np.array([0.229, 0.224, 0.225], "f")
def norm_image(xs):
    return (xs-mean[None,None,None,:])/std[None,None,None,:]
def norm_cls(ys):
    ys = ys.astype(np.int64)
    return ys-1

In [4]:
training_urls = "http://storage.googleapis.com/lpr-imagenet-augmented/imagenet_train-{0000..0147}-{000..019}.tgz"
training_size = 1000000
training = wl.MultiWebLoader(
    training_urls, training_size,
    fields="ppm;png;jpg cls",
    batch_transforms=[norm_image, norm_cls],
    batch_size=64,
    converters="torch",
    shuffle=1000,
    verbose=False,
    use_torch_mp=True,
    queue_size=200,
    processes=4)
tt.loader_test(training)

377.37 samples/s 5.90 batches/s
0 : Tensor torch.Size([64, 3, 224, 224]) cpu torch.float32 -2.1179039478302 2.640000104904175
1 : Tensor torch.Size([64]) cpu torch.int64 46 986


In [5]:
if os.path.exists("./imagenet_val-0000.tgz"):
    testing_urls = "./imagenet_val-0000.tgz"
else:
    testing_urls = "http://storage.googleapis.com/lpr-imagenet-augmented/imagenet_val-0000.tgz"
testing_size = 50000
testing = wl.WebLoader(
    testing_urls, testing_size,
    fields="ppm;png;jpg cls",
    batch_transforms=[norm_image, norm_cls],
    batch_size=64,
    epochs=1,
    converters="torch")
tt.loader_test(testing)

356.69 samples/s 5.57 batches/s
0 : Tensor torch.Size([64, 3, 224, 224]) cpu torch.float32 -2.1179039478302 2.640000104904175
1 : Tensor torch.Size([64]) cpu torch.int64 2 988


In [6]:
model = models.resnet50()
model.cuda()

In [7]:
reload(tt)
#model = nn.DataParallel(model)
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
trainer = tt.Trainer(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     metrics=[tt.Misclassification],
                     device="cuda")
trainer.fit_for(training, 5000)
print(trainer.test_for(testing, 1000))

    5056 /     5000 remaining 00:00:00 total 00:00:29 complete 101% loss   6.96129 [   79] misclassification   0.99969
loss   7.01048 [   16] misclassification   0.99609


In [8]:
for epoch in range(100):
    trainer.fit_for(training, 1000000)
    print(trainer.test_for(testing, 50000))
    err = int(1e6 * trainer.mobjects[0].value())
    fname = "{:06d}-{:04d}.pyd".format(err, epoch)
    print("saving", fname)
    torch.save(model, fname)

 1000000 /  1000000 remaining 00:00:00 total 01:36:26 complete 100% loss   4.27229 [15625] misclassification   0.80969
loss   4.43127 [  782] misclassification   0.79410
  799616 /  1000000 remaining 00:19:17 total 01:36:12 complete  80% loss   3.48562 [12494] misclassification   0.69437


KeyboardInterrupt: 