# <center>MobileNet - Pytorch

# Step 1: Prepare data

In [1]:
# MobileNet-Pytorch
import argparse 
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.metrics import accuracy_score
#from mobilenets import mobilenet

use_cuda = torch.cuda.is_available()
use_cudause_cud  = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

In [2]:
# Train, Validate, Test. Heavily inspired by Kevinzakka https://github.com/kevinzakka/DenseNet/blob/master/data_loader.py

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

valid_size=0.1

# define transforms
valid_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize
])

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize
])


# load the dataset
train_dataset = datasets.CIFAR10(root="data", train=True, 
            download=True, transform=train_transform)

valid_dataset = datasets.CIFAR10(root="data", train=True, 
            download=True, transform=valid_transform)

num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train)) #5w张图片的10%用来当做验证集


np.random.seed(42)
np.random.shuffle(indices) # 随机乱序[0,1,...,49999]

train_idx, valid_idx = indices[split:], indices[:split]


train_sampler = SubsetRandomSampler(train_idx) # 这个很有意思
valid_sampler = SubsetRandomSampler(valid_idx)


# ------------------------- 使用不同的批次大小 ------------------------------------
show_step=10  # 批次大，show_step就小点
max_epoch=60  # 训练最大epoch数目
train_loader = torch.utils.data.DataLoader(train_dataset, 
                batch_size=256, sampler=train_sampler)

valid_loader = torch.utils.data.DataLoader(valid_dataset, 
                batch_size=256, sampler=valid_sampler)


test_transform = transforms.Compose([
    transforms.ToTensor(), normalize
])

test_dataset = datasets.CIFAR10(root="data", 
                                train=False, 
                                download=True,transform=test_transform)

test_loader = torch.utils.data.DataLoader(test_dataset, 
                                          batch_size=64, 
                                          shuffle=True)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


# Step 2: Model Config

In [3]:
# 32  缩放5次到 1x1@1024 
# From https://github.com/kuangliu/pytorch-cifar 
import torch
import torch.nn as nn
import torch.nn.functional as F

class Block_Attention(nn.Module):
    '''Depthwise conv + Pointwise conv'''
    def __init__(self, in_planes, out_planes, stride=1):
        super(Block_Attention, self).__init__()
        
        # 分组卷积数=输入通道数
        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
        
        self.bn1 = nn.BatchNorm2d(in_planes)
        
        
        #self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        
        one_conv_kernel_size = 3
        self.conv1D= nn.Conv1d(1, out_planes, one_conv_kernel_size, stride=1,padding=1,groups=1,dilation=1,bias=False) # 在__init__初始化        
        
        self.bn2 = nn.BatchNorm2d(out_planes)
        
        self.Fixed_Random_Vector=torch.randn(256,1,1024).cuda()*0.1 # [bs,1，in_channel] 

    def forward(self, x):
        
        out = F.relu(self.bn1(self.conv1(x)))
        
        # -------------------------- Attention -----------------------
        w = self.Fixed_Random_Vector[0:out.shape[0]] # [bs,1，in_channel] 
        #print(w[0][0])
        #w = F.avg_pool2d(x,x.shape[-1])  #最好在初始化层定义好
        #print(w.shape)
        # [bs,in_Channel,1,1]
        #w = w.view(w.shape[0],1,w.shape[1])
        # [bs,1,in_Channel]
        # one_conv_filter = nn.Conv1d(1, out_channel, one_conv_kernel_size, stride=1,padding=1,groups=1,dilation=1) # 在__init__初始化
        # [bs,out_channel,in_Channel]
        w = self.conv1D(w)
        w = 0.5*F.tanh(w) # [-0.5,+0.5]
        # -------------- softmax ---------------------------
        #print(w.shape)
        w = w.view(w.shape[0],w.shape[1],w.shape[2],1,1)
        #print(w.shape)
        
        # ------------------------- fusion --------------------------
        out=out.view(out.shape[0],1,out.shape[1],out.shape[2],out.shape[3])
        #print("x size:",out.shape)
        
        out=out*w
        #print("after fusion x size:",out.shape)
        out=out.sum(dim=2)
        
        out = F.relu(self.bn2(out))
        
        return out


class Block(nn.Module):
    '''Depthwise conv + Pointwise conv'''
    def __init__(self, in_planes, out_planes, stride=1):
        super(Block, self).__init__()
        
        # 分组卷积数=输入通道数
        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
        
        self.bn1 = nn.BatchNorm2d(in_planes)
        
        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        
        self.bn2 = nn.BatchNorm2d(out_planes)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        return out


class MobileNet(nn.Module):
    # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
    #cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), [1024,1]]
    
    def __init__(self, num_classes=10):
        super(MobileNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32) # 自动化构建层
        self.linear = nn.Linear(1024, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for x in self.cfg:
            if isinstance(x, int):
                out_planes = x
                stride = 1 
                layers.append(Block(in_planes, out_planes, stride))
            elif isinstance(x, tuple):
                out_planes = x[0]
                stride = x[1]
                layers.append(Block(in_planes, out_planes, stride))
            # AC层通过list存放设置参数
            elif isinstance(x, list):
                out_planes= x[0]
                stride = x[1] if len(x)==2 else 1
                layers.append(Block_Attention(in_planes, out_planes, stride))   
            else:
                pass
            
            in_planes = out_planes
            
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.avg_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [4]:
# From https://github.com/Z0m6ie/CIFAR-10_PyTorch
#model = mobilenet(num_classes=10, large_img=False)

# From https://github.com/kuangliu/pytorch-cifar 
if torch.cuda.is_available():
    model=MobileNet(10).cuda()
else:
    model=MobileNet(10)

optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
criterion = nn.CrossEntropyLoss()

In [5]:
# Implement validation
def train(epoch):
    model.train()
    #writer = SummaryWriter()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        correct = 0
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).sum()
        
        loss = criterion(output, target)
        loss.backward()
        accuracy = 100. * (correct.cpu().numpy()/ len(output))
        optimizer.step()
        if batch_idx % show_step == 0:
            if batch_idx % 16*show_step==0:
                print(torch.abs(model.layers[12].conv1D.weight).sum())
                print(model.layers[12].conv1D.weight.shape)
                print(model.layers[12].conv1D.weight)
                
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accuracy: {:.2f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item(), accuracy))
            #writer.add_scalar('Loss/Loss', loss.item(), epoch)
            #writer.add_scalar('Accuracy/Accuracy', accuracy, epoch)
    scheduler.step()

In [6]:
def validate(epoch):
    model.eval()
    #writer = SummaryWriter()
    valid_loss = 0
    correct = 0
    for data, target in valid_loader:
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        output = model(data)
        valid_loss += F.cross_entropy(output, target, size_average=False).item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).sum()

    valid_loss /= len(valid_idx)
    accuracy = 100. * correct.cpu().numpy() / len(valid_idx)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        valid_loss, correct, len(valid_idx),
        100. * correct / len(valid_idx)))
    #writer.add_scalar('Loss/Validation_Loss', valid_loss, epoch)
    #writer.add_scalar('Accuracy/Validation_Accuracy', accuracy, epoch)
    return valid_loss, accuracy

In [7]:
# Fix best model

def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        output = model(data)
        test_loss += F.cross_entropy(output, target, size_average=False).item() # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct.cpu().numpy() / len(test_loader.dataset)))

In [8]:
def save_best(loss, accuracy, best_loss, best_acc):
    if best_loss == None:
        best_loss = loss
        best_acc = accuracy
        file = 'saved_models/best_save_model.p'
        torch.save(model.state_dict(), file)
        
    elif loss < best_loss and accuracy > best_acc:
        best_loss = loss
        best_acc = accuracy
        file = 'saved_models/best_save_model.p'
        torch.save(model.state_dict(), file)
    return best_loss, best_acc

In [9]:
# Fantastic logger for tensorboard and pytorch, 
# run tensorboard by opening a new terminal and run "tensorboard --logdir runs"
# open tensorboard at http://localhost:6006/
from tensorboardX import SummaryWriter
best_loss = None
best_acc = None

import time 
SINCE=time.time()

for epoch in range(max_epoch):
    train(epoch)
    loss, accuracy = validate(epoch)
    best_loss, best_acc = save_best(loss, accuracy, best_loss, best_acc)
    
    NOW=time.time() 
    DURINGS=NOW-SINCE
    SINCE=NOW
    print("the time of this epoch:[{} s]".format(DURINGS))
    
#writer = SummaryWriter()
#writer.export_scalars_to_json("./all_scalars.json")

#writer.close()

#---------------------------- Test ------------------------------
test(epoch)

tensor(886.9125, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.1876,  0.3725,  0.4909]],

        [[ 0.0057,  0.2638, -0.2336]],

        [[-0.3289, -0.1337,  0.1112]],

        ...,

        [[ 0.2788,  0.4954,  0.5582]],

        [[ 0.1081, -0.3743, -0.0758]],

        [[-0.4959,  0.2686,  0.0928]]], device='cuda:0')
tensor(913.0258, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 2.2267e-01,  3.6848e-01,  5.0781e-01]],

        [[ 8.7599e-02,  3.7265e-01, -1.2191e-01]],

        [[-3.9411e-01, -6.3871e-02,  1.1054e-01]],

        ...,

        [[ 2.9510e-01,  5.1343e-01,  5.8591e-01]],

        [[ 2.1007e-01, -2.4910e-01, -1.2333e-01]],

        [[-4.1667e-01,  3.6271e-01,  7.9077e-02]]], device='cuda:0')
tensor(922.7245, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2330,  0.3174,  0.5423]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.4048, -0.0744,  0.1010]],

        ...,

        [

tensor(1001.8877, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.4990,  0.2456,  0.5164]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.4172, -0.0833,  0.0902]],

        ...,

        [[ 0.2978,  0.5420,  0.6358]],

        [[ 0.1589, -0.4987, -0.1945]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 1.7666, Accuracy: 1414/5000 (28.00%)

the time of this epoch:[154.9584677219391 s]
tensor(1005.5372, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 5.3452e-01,  2.1521e-01,  5.0088e-01]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-4.1722e-01, -8.2926e-02,  9.0382e-02]],

        ...,

        [[ 3.2695e-01,  5.3315e-01,  6.2856e-01]],

        [[ 1.6920e-01, -5.0847e-01, -1.7649e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')
tensor(1023.5219, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.5087,  0.2175


Validation set: Average loss: 1.6627, Accuracy: 1899/5000 (37.00%)

the time of this epoch:[154.97862839698792 s]
tensor(1134.0073, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 5.3212e-01,  7.5836e-02,  8.6666e-01]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-4.2128e-01, -1.0283e-01,  8.6048e-02]],

        ...,

        [[ 2.9174e-01,  6.4943e-01,  7.1858e-01]],

        [[ 2.0217e-01, -6.9422e-01, -1.1530e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')
tensor(1156.2297, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.5093,  0.0440,  0.9209]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.4466, -0.0635,  0.0852]],

        ...,

        [[ 0.3425,  0.6239,  0.7416]],

        [[ 0.2325, -0.6859, -0.1525]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1176.1885, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.4070,  0.076

tensor(1290.4641, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3834,  0.1829,  1.0896]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.2578, -0.4614,  0.1668]],

        ...,

        [[ 0.0485,  0.9192,  0.6606]],

        [[ 0.1411, -0.8675, -0.2887]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1309.2604, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.4380,  0.2344,  1.0336]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.0298, -0.5986,  0.1042]],

        ...,

        [[ 0.0500,  1.0079,  0.5523]],

        [[ 0.1143, -0.8912, -0.3037]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 1.4069, Accuracy: 2578/5000 (51.00%)

the time of this epoch:[154.76761436462402 s]
tensor(1313.7820, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.4356,  0.2662,  1.0201]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[ 0.0050

tensor(1342.6089, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 2.8755e-01,  3.3931e-01,  1.0750e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-9.9666e-02, -6.4661e-01, -1.1496e-01]],

        ...,

        [[-5.1840e-02,  1.0801e+00,  4.8459e-01]],

        [[ 1.4770e-01, -9.7942e-01, -2.2372e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')

Validation set: Average loss: 1.1111, Accuracy: 3142/5000 (62.00%)

the time of this epoch:[154.79829597473145 s]
tensor(1342.9496, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3076,  0.3336,  1.0677]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.1073, -0.6423, -0.1244]],

        ...,

        [[-0.0434,  1.0960,  0.4643]],

        [[ 0.1555, -0.9692, -0.2350]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1347.2941, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3008,  0.327

tensor(1385.7788, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3253,  0.2296,  1.1266]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.2108, -0.5663, -0.3555]],

        ...,

        [[-0.0150,  1.1890,  0.4028]],

        [[ 0.1489, -1.0117, -0.2596]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1390.8772, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3007,  0.2144,  1.1515]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.2418, -0.6018, -0.3085]],

        ...,

        [[-0.0257,  1.1734,  0.4306]],

        [[ 0.1697, -1.0164, -0.2460]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 1.0383, Accuracy: 3358/5000 (67.00%)

the time of this epoch:[154.7825059890747 s]
tensor(1391.0463, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2708,  0.1978,  1.1768]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.2469,

tensor(1425.1241, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 2.4371e-01,  1.8962e-01,  1.2278e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-2.6220e-01, -6.8417e-01, -2.8981e-01]],

        ...,

        [[ 1.4144e-02,  1.2323e+00,  3.9715e-01]],

        [[ 1.5916e-01, -1.0275e+00, -2.8146e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')

Validation set: Average loss: 0.9757, Accuracy: 3431/5000 (68.00%)

the time of this epoch:[154.77176094055176 s]
tensor(1425.8032, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 2.4522e-01,  1.7065e-01,  1.2352e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-2.7134e-01, -7.0182e-01, -2.5844e-01]],

        ...,

        [[ 1.8854e-02,  1.2465e+00,  3.7978e-01]],

        [[ 1.5045e-01, -1.0206e+00, -2.9430e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')
tensor(1427.7100, device='cuda:0')



Validation set: Average loss: 0.8344, Accuracy: 3749/5000 (74.00%)

the time of this epoch:[154.49200677871704 s]
tensor(1433.4954, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 3.0238e-01,  1.6142e-01,  1.2081e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-2.8849e-01, -6.9666e-01, -2.6999e-01]],

        ...,

        [[-4.6570e-03,  1.2221e+00,  4.3006e-01]],

        [[ 8.1725e-02, -1.0792e+00, -2.7208e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')
tensor(1433.4757, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3302,  0.1674,  1.1834]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.2828, -0.7091, -0.2585]],

        ...,

        [[ 0.0074,  1.2189,  0.4345]],

        [[ 0.0891, -1.0950, -0.2540]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1434.3411, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3113,  0.156

tensor(1440.2026, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2997,  0.1645,  1.2150]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3142, -0.6991, -0.2538]],

        ...,

        [[ 0.0079,  1.2865,  0.3609]],

        [[ 0.0865, -1.1070, -0.2649]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1440.9905, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3023,  0.1801,  1.2061]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3239, -0.6942, -0.2505]],

        ...,

        [[ 0.0063,  1.2861,  0.3617]],

        [[ 0.0889, -1.1011, -0.2722]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 0.7807, Accuracy: 3819/5000 (76.00%)

the time of this epoch:[154.77425694465637 s]
tensor(1441.1646, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 3.1126e-01,  1.9039e-01,  1.1926e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-

tensor(1447.6639, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3194,  0.1387,  1.2110]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3696, -0.6844, -0.2200]],

        ...,

        [[-0.0769,  1.3122,  0.3301]],

        [[ 0.0645, -1.1285, -0.2609]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 0.7482, Accuracy: 3914/5000 (78.00%)

the time of this epoch:[154.76147723197937 s]
tensor(1447.9750, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3199,  0.1494,  1.2076]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3731, -0.6875, -0.2079]],

        ...,

        [[-0.0770,  1.3112,  0.3332]],

        [[ 0.0734, -1.1260, -0.2626]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1448.7686, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 3.2927e-01,  1.5046e-01,  1.2007e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-

tensor(1451.0057, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 3.3445e-01,  1.4797e-01,  1.1939e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-3.8879e-01, -6.8939e-01, -1.7568e-01]],

        ...,

        [[-8.8852e-02,  1.3044e+00,  3.4048e-01]],

        [[ 5.0632e-02, -1.1360e+00, -2.5724e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')
tensor(1451.4337, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 3.3654e-01,  1.4994e-01,  1.1911e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-3.8221e-01, -6.9593e-01, -1.7534e-01]],

        ...,

        [[-8.7592e-02,  1.3017e+00,  3.4507e-01]],

        [[ 5.2545e-02, -1.1307e+00, -2.6323e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')

Validation set: Average loss: 0.6594, Accuracy: 4054/5000 (81.00%)

the time of this epoch:[154.90073823928833 s]
tensor(1451.3726, device='cuda:0')


tensor(1453.9634, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 3.1280e-01,  1.6205e-01,  1.2055e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-3.8072e-01, -7.1258e-01, -1.3818e-01]],

        ...,

        [[-1.2007e-01,  1.2849e+00,  3.5410e-01]],

        [[ 4.7075e-02, -1.1426e+00, -2.4828e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')

Validation set: Average loss: 0.6405, Accuracy: 4058/5000 (81.00%)

the time of this epoch:[154.81630420684814 s]
tensor(1454.0432, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3187,  0.1673,  1.1983]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3806, -0.7125, -0.1393]],

        ...,

        [[-0.1202,  1.2855,  0.3534]],

        [[ 0.0499, -1.1468, -0.2431]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1454.2257, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.3215,  0.164


Validation set: Average loss: 0.6365, Accuracy: 4081/5000 (81.00%)

the time of this epoch:[154.7929549217224 s]
tensor(1456.5156, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2864,  0.1565,  1.2321]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3980, -0.6975, -0.1429]],

        ...,

        [[-0.1213,  1.2941,  0.3470]],

        [[ 0.0283, -1.1542, -0.2441]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1456.2544, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2869,  0.1595,  1.2300]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3931, -0.7031, -0.1390]],

        ...,

        [[-0.1164,  1.2950,  0.3480]],

        [[ 0.0316, -1.1579, -0.2395]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1456.5004, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2906,  0.1616,  1.2262]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3925,

tensor(1456.8606, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2993,  0.1652,  1.2148]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3948, -0.6932, -0.1667]],

        ...,

        [[-0.1209,  1.3139,  0.3271]],

        [[ 0.0425, -1.1506, -0.2439]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1456.8669, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2996,  0.1662,  1.2141]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.4041, -0.6815, -0.1742]],

        ...,

        [[-0.1172,  1.3182,  0.3229]],

        [[ 0.0412, -1.1491, -0.2460]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 0.6037, Accuracy: 4152/5000 (83.00%)

the time of this epoch:[154.9062056541443 s]
tensor(1456.9149, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2983,  0.1670,  1.2147]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.4072,

tensor(1457.5491, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2932,  0.1643,  1.2192]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3926, -0.6842, -0.1951]],

        ...,

        [[-0.1110,  1.3231,  0.3228]],

        [[ 0.0340, -1.1512, -0.2416]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 0.5975, Accuracy: 4162/5000 (83.00%)

the time of this epoch:[154.79656529426575 s]
tensor(1457.5946, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2948,  0.1652,  1.2174]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3922, -0.6849, -0.1945]],

        ...,

        [[-0.1131,  1.3201,  0.3249]],

        [[ 0.0339, -1.1492, -0.2437]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1458.0828, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 2.9488e-01,  1.6967e-01,  1.2157e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-

tensor(1458.6729, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2938,  0.1623,  1.2184]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3864, -0.6915, -0.1927]],

        ...,

        [[-0.1139,  1.3300,  0.3162]],

        [[ 0.0346, -1.1480, -0.2463]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1458.5966, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 2.8939e-01,  1.6057e-01,  1.2227e+00]],

        [[ 8.7704e-02,  3.7274e-01, -1.2173e-01]],

        [[-3.8748e-01, -6.9036e-01, -1.9291e-01]],

        ...,

        [[-1.1549e-01,  1.3302e+00,  3.1555e-01]],

        [[ 3.5594e-02, -1.1493e+00, -2.4467e-01]],

        [[-4.1657e-01,  3.6286e-01,  7.8983e-02]]], device='cuda:0')

Validation set: Average loss: 0.5780, Accuracy: 4185/5000 (83.00%)

the time of this epoch:[154.88968420028687 s]
tensor(1458.6074, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2881,  0.159

tensor(1458.8630, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2903,  0.1684,  1.2171]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3887, -0.6891, -0.1942]],

        ...,

        [[-0.1189,  1.3321,  0.3128]],

        [[ 0.0296, -1.1467, -0.2471]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')

Validation set: Average loss: 0.5859, Accuracy: 4204/5000 (84.00%)

the time of this epoch:[154.85468363761902 s]
tensor(1458.8746, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2897,  0.1669,  1.2182]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3872, -0.6901, -0.1952]],

        ...,

        [[-0.1199,  1.3336,  0.3108]],

        [[ 0.0278, -1.1472, -0.2468]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1458.9956, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2901,  0.1685,  1.2176]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3874

tensor(1459.2019, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2888,  0.1686,  1.2197]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3794, -0.6954, -0.1992]],

        ...,

        [[-0.1370,  1.3255,  0.3117]],

        [[ 0.0263, -1.1545, -0.2390]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1459.3094, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2885,  0.1666,  1.2212]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3829, -0.6913, -0.2019]],

        ...,

        [[-0.1369,  1.3257,  0.3120]],

        [[ 0.0300, -1.1513, -0.2419]],

        [[-0.4166,  0.3629,  0.0790]]], device='cuda:0')
tensor(1459.3159, device='cuda:0')
torch.Size([1024, 1, 3])
Parameter containing:
tensor([[[ 0.2873,  0.1644,  1.2232]],

        [[ 0.0877,  0.3727, -0.1217]],

        [[-0.3848, -0.6898, -0.2012]],

        ...,

        [[-0.1415,  1.3252,  0.3105]],

        [[ 0.0285, -1.1493, -0.2442]],

# Step 3: Test

In [12]:
test(epoch)


Test set: Average loss: 0.4542, Accuracy: 8932/10000 (89.32%)

