In [91]:
# 导入所需库
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as datas
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter

In [92]:
# 设置GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [93]:
# 参数设置
epochs = 90
batchsize = 128
momentum = 0.9
weight_decay = 0.0005
lr_initial = 0.01
Image_size = 227
num_classes = 1000



In [94]:
# Alexnet
class AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
    
        # input size should be (b x 3 x 227 x 227)
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4),# (b x 96 x 55 x 55)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),  # section 3.3
            nn.MaxPool2d(kernel_size=3,stride=2),# section 4.4 (b x 96 x 27 x 27)
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), #(b x 256 x 27 x 27)
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3,stride=2), #(b x 256 x 13 x 13)
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1), #(b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels= 384, kernel_size=3, padding=1), #(b x 384 x 13 x 13)
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels= 256, kernel_size=3, padding=1), #(b x 256 x 13 x 13)
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2) #(b x 256 x 6 x 6)
        )

        # Classifier is just a name for Linear layers
        self.Classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features=(256*6*6), out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=num_classes)
        )
        self.init_bias() # initial bias
    
    

    def init_bias(self):
        for layer in self.net:
            if isinstance(layer,nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0.0)
        # 对第二个、第四个、第五个卷积层bias设为1
        nn.init.constant_(self.net[4].bias, 1.0)
        nn.init.constant_(self.net[10].bias, 1.0)
        nn.init.constant_(self.net[12].bias, 1.0)

        '''for layer in self.Classifier:
            if isinstance(layer,nn.Linear):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 1.0)'''

    def forward(self, x):
        x = self.net(x)
        x = x.reshape(-1,256*6*6)
        return self.Classifier(x)

In [95]:
seed = torch.initial_seed()
print('Use Seed: {}'.format(seed))

Use Seed: 13931179872293712972


In [96]:
tbwriter = SummaryWriter(log_dir='LOGDIR')
print('TensorboardX summary writer created')

TensorboardX summary writer created


In [97]:
# 创建模型
alexnet = AlexNet().to(device=device)
print(alexnet)
print('Alex_Net created')

AlexNet(
  (net): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU()
    (6): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (Classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): Re

In [98]:
# 自定义转化图片转换函数 (1 x 24 x 24) to (3 x 227 x 227)
def custom_transform(image):
    # 将单通道图像转换为三通道
    image = image.repeat(3, 1, 1)
    # 调整图像大小到 (227, 227)
    image = transforms.Resize((227, 227))(image)
    return image

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(custom_transform)
])

In [99]:
# 数据准备
train_dataset = datasets.FashionMNIST('./data', train=True, transform=transform,
                                      download=True)
test_dataset = datasets.FashionMNIST('./data', train=False, transform=transform,
                                      download=True)
train_dataloader = datas.DataLoader(train_dataset,batch_size=batchsize)
test_dataloader = datas.DataLoader(test_dataset,batch_size=batchsize)

In [100]:
# 定义优化算法
optimizer = optim.SGD(alexnet.parameters(), lr=lr_initial, momentum=momentum, weight_decay= weight_decay)
print('Optimizer created')

# multiply LR by 1 / 10 after every 30 epochs
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
print('LR Scheduler created')

# 定义损失函数
loss_fn = nn.CrossEntropyLoss()


Optimizer created
LR Scheduler created


In [None]:
# 训练开始
total_step = 1
for i in range(epochs):
    lr_scheduler.step()
    for data in train_dataloader:
        images, targets = data
        images, targets = images.to(device), targets.to(device)

        # 计算损失值
        output = alexnet(images)
        loss = loss_fn(output, targets)

        # 更新参数
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # log the information and add to tensorboard
        if total_step % 10 == 0:
                with torch.no_grad():
                    _, preds = torch.max(output, 1)
                    accuracy = torch.sum(preds == targets)

                    print('Epoch: {} \tStep: {} \tLoss: {:.4f} \tAcc: {}'
                        .format(i + 1, total_step, loss.item(), accuracy.item()))
                    tbwriter.add_scalar('loss', loss.item(), total_step)
                    tbwriter.add_scalar('accuracy', accuracy.item(), total_step)
        
        if total_step % 100 == 0:
                with torch.no_grad():
                    # print and save the grad of the parameters
                    # also print and save parameter values
                    print('*' * 10)
                    for name, parameter in alexnet.named_parameters():
                        if parameter.grad is not None:
                            avg_grad = torch.mean(parameter.grad)
                            print('\t{} - grad_avg: {}'.format(name, avg_grad))
                            tbwriter.add_scalar('grad_avg/{}'.format(name), avg_grad.item(), total_step)
                            tbwriter.add_histogram('grad/{}'.format(name),
                                    parameter.grad.cpu().numpy(), total_step)
                        if parameter.data is not None:
                            avg_weight = torch.mean(parameter.data)
                            print('\t{} - param_avg: {}'.format(name, avg_weight))
                            tbwriter.add_histogram('weight/{}'.format(name),
                                    parameter.data.cpu().numpy(), total_step)
                            tbwriter.add_scalar('weight_avg/{}'.format(name), avg_weight.item(), total_step)

        total_step = total_step+1



Epoch: 1 	Step: 10 	Loss: 6.4151 	Acc: 12
Epoch: 1 	Step: 20 	Loss: 3.1762 	Acc: 21
Epoch: 1 	Step: 30 	Loss: 3.0151 	Acc: 18
Epoch: 1 	Step: 40 	Loss: 2.4541 	Acc: 14
Epoch: 1 	Step: 50 	Loss: 2.5537 	Acc: 11
Epoch: 1 	Step: 60 	Loss: 2.4018 	Acc: 13
Epoch: 1 	Step: 70 	Loss: 2.3810 	Acc: 11
Epoch: 1 	Step: 80 	Loss: 2.3532 	Acc: 14
Epoch: 1 	Step: 90 	Loss: 2.3415 	Acc: 10
Epoch: 1 	Step: 100 	Loss: 2.3309 	Acc: 7
**********
	net.0.weight - grad_avg: 1.4880896515023778e-06
	net.0.weight - param_avg: -4.360907041700557e-05
	net.0.bias - grad_avg: 1.2867454870502115e-06
	net.0.bias - param_avg: -1.3037976714258548e-05
	net.4.weight - grad_avg: 3.943101773984381e-07
	net.4.weight - param_avg: 7.631379048689269e-06
	net.4.bias - grad_avg: 3.385863601579331e-05
	net.4.bias - param_avg: 0.9953987002372742
	net.8.weight - grad_avg: 8.817044545139652e-06
	net.8.weight - param_avg: 4.499455462791957e-05
	net.8.bias - grad_avg: 2.0420096916495822e-05
	net.8.bias - param_avg: 8.739939221413806e