![Inception_Module](https://img-blog.csdn.net/20160225155336279)
![GoogLeNet.png](https://img-blog.csdn.net/20160225155414702)

In [5]:
import torch
import torch.nn as nn

# Inception
class Inception(nn.Module):
    def __init__(self, in_planes, kernel_1_x, kernel_3_in, kernel_3_x, kernel_5_in, kernel_5_x, pool_planes):
        super(Inception, self).__init__()
        # 1x1 conv branch
        self.b1 = nn.Sequential(
            nn.Conv2d(in_planes, kernel_1_x, kernel_size=1),
            nn.BatchNorm2d(kernel_1_x),
            nn.ReLU(inplace=True),  # can save memery,but override the old variants
        )
        
        # 1x1 conv -> 3x3 conv branch
        self.b2 = nn.Sequential(
            nn.Conv2d(in_planes, kernel_3_in, kernel_size=1),
            nn.BatchNorm2d(kernel_3_in),
            nn.ReLU(True),
            nn.Conv2d(kernel_3_in, kernel_3_x, kernel_size=3, padding=1),
            nn.BatchNorm2d(kernel_3_x),
            nn.ReLU(True),
        )
        
        # 1x1 conv -> 5x5 conv branch
        self.b3 = nn.Sequential(
            nn.Conv2d(in_planes, kernel_5_in, kernel_size=1),
            nn.BatchNorm2d(kernel_5_in),
            nn.ReLU(True),
            nn.Conv2d(kernel_5_in, kernel_5_x, kernel_size=3, padding=1),
            nn.BatchNorm2d(kernel_5_x),
            nn.ReLU(True),
            nn.Conv2d(kernel_5_x, kernel_5_x, kernel_size=3, padding=1),
            nn.BatchNorm2d(kernel_5_x),
            nn.ReLU(True)
        )
        
        # 3x3 pool -> 1x1 conv branch
        self.b4 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            nn.Conv2d(in_planes, pool_planes, kernel_size=1),
            nn.BatchNorm2d(pool_planes),
            nn.ReLU(True),
        )
        
    def forward(self, x):
        y1 = self.b1(x)
        y2 = self.b2(x)
        y3 = self.b3(x)
        y4 = self.b4(x)
        return torch.cat([y1,y2,y3,y4], 1)

In [16]:
# GoogLeNet
class GoogLeNet(nn.Module):
    def __init__(self, num_classes=10):
        super(GoogLeNet, self).__init__()
        self.pre_layers = nn.Sequential(
            nn.Conv2d(3, 192, kernel_size=3, padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )
        
        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
        
        self.max_pool = nn.MaxPool2d(3, stride=2, padding=1)
        
        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)

        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)

        self.avgpool = nn.AvgPool2d(8, stride=1)
        self.linear = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        x = self.pre_layers(x)
        x = self.a3(x)
        x = self.b3(x)
        x = self.max_pool(x)
        x = self.a4(x)
        x = self.b4(x)
        x = self.c4(x)
        x = self.d4(x)
        x = self.e4(x)
        x = self.max_pool(x)
        x = self.a5(x)
        x = self.b5(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

In [None]:
# %load train_CIFAR-10.py
# CIFAR-10 通用训练python脚本
# --------------------------------------
import torch 
import torchvision
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import argparse

# start 
if __name__ == '__main__':
    main()

# main
def main():
    parser = argparse.ArgumentParser(description='cifar-10 with PyTorch')
    parser.add_argument('--lr', default=0.001, type=float, help='learning rate')
    parser.add_argument('--epoch', default=50, type=int, help='number of epoch tp train for') 
    parser.add_argument('--trainBatchSize', default=128, type=int, help='training batch size')
    parser.add_argument('--testBatchSize', default=128, type=int, help='testing batch size')
    parser.add_argument('--cuda', default=torch.cuda.is_available(), type=bool, help='use cuda or not')
    
    config_list = ['--lr', '0.001', '--epoch', '50', '--trainBatchSize', '128', '--testBatchSize', '128', '--cuda', 'True']
    args = parser.parse_args(config_list) 
    
    solver = Solver(args)
    solver.run()

CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Solver
class Solver(object):
    def __init__(self, config):
        self.model = None
        self.lr = config.lr
        self.epochs = config.epoch
        self.train_batch_size = config.trainBatchSize
        self.test_batch_size = config.testBatchSize
        self.criterion = None
        self.optimizer = None
        self.scheduler = None
        self.device = 'cuda' if config.cuda else 'cpu'
        self.train_loader = None
        self.test_loader = None
        
    def print_model(self):
        print(self.model)
        
    def load_data(self):
        train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.ToTensor()])
        test_transform = transforms.Compose([transforms.ToTensor()])
        train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
        test_set = datasets.CIFAR10(root='./data', train=False, download=False, transform=test_transform)
        self.train_loader = DataLoader(train_set, batch_size=self.train_batch_size, shuffle=True)
        self.test_loader = DataLoader(test_set, batch_size=self.test_batch_size, shuffle=False)
    
    def load_model(self):
        # self.model = LeNet().to(self.device)
        # self.model = AlexNet().to(self.device)
        # self.model = VGG11().to(self.device)
        # self.model = VGG13().to(self.device)
        # self.model = VGG16().to(self.device)
        # self.model = VGG19().to(self.device)
        self.model = GoogLeNet().to(self.device)
        # self.model = resnet18().to(self.device)
        # self.model = resnet34().to(self.device)
        # self.model = resnet50().to(self.device)
        # self.model = resnet101().to(self.device)
        # self.model = resnet152().to(self.device)
        # self.model = DenseNet121().to(self.device)
        # self.model = DenseNet161().to(self.device)
        # self.model = DenseNet169().to(self.device)
        # self.model = DenseNet201().to(self.device)
        # self.model = WideResNet(depth=28, num_classes=10).to(self.device)
        
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[75, 150], gamma=0.5)
        self.criterion = nn.CrossEntropyLoss().to(self.device)
    
    # train
    def train(self):
        print('Training:')
        self.model.train()
        train_loss = 0.0
        train_correct = 0 
        total = 0 
        
        for ibatch, (images, labels) in enumerate(self.train_loader):
            images, labels = images.to(self.device), labels.to(self.device)
            outputs = self.model(images)
            self.optimizer.zero_grad()
            loss = self.criterion(outputs, labels)
            loss.backward()
            self.optimizer.step()
            train_loss += loss.item()
            _, pred = torch.max(outputs, 1) # second param "1" represents the dimension to be reduced
            total += labels.size(0)
            
            # train_correct incremented by one if predicted right
            # train_correct += np.sum(prediction[1].cpu().numpy() == target.cpu().numpy())
            train_correct += (pred == labels).sum().item()
            if ibatch % 99 == 0:
                print('\t{}/{}: loss = {:.4f}, Acc = {:.3f}%'.format(ibatch, len(self.train_loader), train_loss/(ibatch+1), 100. * train_correct/total))
        return train_loss, float(train_correct/total)
    
    # test
    def test(self):
        print('Testing:')
        self.model.eval()
        test_loss = 0.0 
        test_correct = 0 
        total = 0
        
        with torch.no_grad():
            for ibatch, (images, labels) in enumerate(self.test_loader):
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                test_loss += loss.item()
                _, pred = torch.max(outputs, 1)
                total += labels.size(0)
                test_correct += (pred == labels).sum().item()
                if ibatch % 99 == 0:
                    print('\t{}/{}: loss = {:.4f}, Acc = {:.3f}%'.format(ibatch, len(self.test_loader), test_loss/(ibatch+1), 100. * test_correct/total))
        return test_loss, float(test_correct/total)
    
    def save_model(self):
        model_out_path = './model/vgg_cifar10.pth'
        torch.save(self.model, model_out_path)
        print("* Checkpoint saved to {}".format(model_out_path))
        
    # run
    def run(self):
        self.load_data()
        self.load_model() 
        accuracy = 0.
        
        for epoch in range(1, self.epochs + 1):
            self.scheduler.step(epoch)
            print("\n===> epoch: {}/{}".format(epoch, self.epochs))
            train_result = self.train()
            print(train_result)
            test_result = self.test()
            accuracy = max(accuracy, test_result[1])
        print("===> BEST ACC. PERFORMANCE: {:.3f}%".format(accuracy * 100))
        self.save_model()

Files already downloaded and verified

===> epoch: 1/50
Training:
	0/391: loss = 2.3144, Acc = 15.625%
	99/391: loss = 1.5603, Acc = 41.969%
	198/391: loss = 1.3859, Acc = 48.869%
	297/391: loss = 1.2669, Acc = 53.576%
(462.0502653121948, 0.56814)
Testing:
	0/79: loss = 0.8653, Acc = 64.844%

===> epoch: 2/50
Training:
	0/391: loss = 0.8877, Acc = 67.188%
	99/391: loss = 0.8085, Acc = 71.094%
	198/391: loss = 0.7766, Acc = 72.444%
	297/391: loss = 0.7540, Acc = 73.382%
(283.73740047216415, 0.74518)
Testing:
	0/79: loss = 0.7382, Acc = 74.219%

===> epoch: 3/50
Training:
	0/391: loss = 0.4768, Acc = 82.031%
	99/391: loss = 0.5574, Acc = 80.273%
	198/391: loss = 0.5553, Acc = 80.555%
	297/391: loss = 0.5556, Acc = 80.623%
(214.47957506775856, 0.80916)
Testing:
	0/79: loss = 0.6552, Acc = 73.438%

===> epoch: 4/50
Training:
	0/391: loss = 0.4099, Acc = 87.500%
	99/391: loss = 0.4646, Acc = 83.922%
	198/391: loss = 0.4576, Acc = 84.159%
	297/391: loss = 0.4612, Acc = 84.152%
(177.154325395