In [3]:
import torch
import torch.nn as nn
import torch.utils.data as Data, Dataset, DataLoader
import torchvision
from torchvision import transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import os
import copy
import glob
import time
import pandas as pd
from typing import Dict, List, Tuple
from PIL import Image
import random

In [4]:
DOANLOAD_DATASET = True
LR = 0.0001
BATCH_SIZE=32
EPOCH = 30
OUTPUT_PATH = '/kaggle/working/'

In [5]:
import torch

# 檢查GPU是否可用
if torch.cuda.is_available():
    device = torch.device('cuda')
    print(torch.cuda.get_device_name(0))
else:
    device = torch.device('cpu')
    print('GPU not available, using CPU.')

Tesla T4


In [6]:
!pip install dill
import dill



In [7]:
class CFG:
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    NUM_DEVICES = torch.cuda.device_count()
    NUM_WORKERS = os.cpu_count()
    NUM_CLASSES = 45
    EPOCHS = 7
    BATCH_SIZE = (
        32 if torch.cuda.device_count() <= 2
        else (32 * torch.cuda.device_count())
    )
    LR = 0.001
    APPLY_SHUFFLE = True
    SEED = 768
    HEIGHT = 256
    WIDTH = 256
    CHANNELS = 3
    IMAGE_SIZE = (256, 256, 3)

In [9]:
# Reload the dataset
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


DATASET_PATH = '/kaggle/working'

# 加載數據集
with open('/kaggle/input/usedata/train_loader.pkl','rb') as f:
    train_loader = dill.load(f)
with open('/kaggle/input/usedata/val_loader.pkl','rb') as f:
    val_loader = dill.load(f)
with open('/kaggle/input/usedata/test_loader.pkl','rb') as f:
    test_loader = dill.load(f)

In [10]:
# 建立存放結果的資料集
import os

path = '/kaggle/working/test'
if not os.path.isdir(path):
    os.mkdir(path)
    
path = '/kaggle/working/train'
if not os.path.isdir(path):
    os.mkdir(path)

In [11]:
class AlexNet(nn.Module):

    def __init__(self, num_classes=45):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Linear(256*8*8, num_classes)
        #self.classifier = nn.Linear(256 * (image_size_after_pooling // 4) * (image_size_after_pooling // 4), num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


def alexnet(**kwargs):
    model = AlexNet(**kwargs)
    return model

# Alexnet

In [12]:
import torch
torch.cuda.empty_cache()
alexNet = alexnet().cuda()
alexNet = nn.DataParallel(alexNet)

optimizer = torch.optim.Adam(alexNet.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)
#local_train_data = DataLoader(train_dataloader, batch_size=batch_size, shuffle=True)

# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)

for epoch in range(EPOCH):
    alexNet.train()
    for step, (x, y) in enumerate(local_train_data):
        #b_x = Variable(x, requires_grad=False)
        #b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = alexNet(b_x)
        loss = loss_function(out, b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))

            
local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)

# 在評估模式下，模型只需要在主 GPU 上執行
alexNet = alexNet.module

# 測試集
alexNet.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(alexNet.state_dict(), model_path+'alexNet.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = alexNet(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = alexNet(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'alexNet_train_prediction.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'alexNet_test_prediction.pt'))


"\nalexNet = alexnet().cuda()\nalexNet = nn.DataParallel(alexNet)\n\noptimizer = torch.optim.Adam(alexNet.parameters(), lr=LR)\nloss_function = nn.CrossEntropyLoss()\nlocal_train_data = copy.deepcopy(train_loader)\n#local_train_data = DataLoader(train_dataloader, batch_size=batch_size, shuffle=True)\nfor epoch in range(EPOCH):\n    alexNet.train()\n    for step, (x, y) in enumerate(local_train_data):\n        #b_x = Variable(x, requires_grad=False)\n        #b_y = Variable(y, requires_grad=False)\n        b_x = x.to(device)  # 將輸入數據移動到GPU上\n        b_y = y.to(device)  # 將標籤移動到GPU上\n        out = alexNet(b_x)\n        loss = loss_function(out, b_y)\n\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n\n        if step % 100 == 0:\n            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))\n            \ntemp_train_loader = copy.deepcopy(train_loader)\ntemp_train_x, temp_train_y = next(iter(temp_train_loader))\n\nalexNet.eval()  # 將

# DenseNet

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

from torch.autograd import Variable

class Bottleneck(nn.Module):
    def __init__(self, inplanes, expansion=4, growthRate=12, dropRate=0):
        super(Bottleneck, self).__init__()
        planes = expansion * growthRate
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, growthRate, kernel_size=3,
                               padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.dropRate = dropRate

    def forward(self, x):
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)
        if self.dropRate > 0:
            out = F.dropout(out, p=self.dropRate, training=self.training)

        out = torch.cat((x, out), 1)

        return out


class BasicBlock(nn.Module):
    def __init__(self, inplanes, expansion=1, growthRate=12, dropRate=0):
        super(BasicBlock, self).__init__()
        planes = expansion * growthRate
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, growthRate, kernel_size=3,
                               padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.dropRate = dropRate

    def forward(self, x):
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        if self.dropRate > 0:
            out = F.dropout(out, p=self.dropRate, training=self.training)

        out = torch.cat((x, out), 1)

        return out


class Transition(nn.Module):
    def __init__(self, inplanes, outplanes):
        super(Transition, self).__init__()
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, outplanes, kernel_size=1,
                               bias=False)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)
        out = F.avg_pool2d(out, 2)
        return out


class DenseNet(nn.Module):

    def __init__(self, depth=40, block=Bottleneck,
        dropRate=0, num_classes=45, growthRate=12, compressionRate=2):
        super(DenseNet, self).__init__()

        assert (depth - 4) % 3 == 0, 'depth should be 3n+4'
        n = (depth - 4) / 3 if block == BasicBlock else (depth - 4) // 6

        self.growthRate = growthRate
        self.dropRate = dropRate

        # self.inplanes is a global variable used across multiple
        # helper functions
        self.inplanes = growthRate * 2
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1,
                               bias=False)
        self.dense1 = self._make_denseblock(block, n)
        self.trans1 = self._make_transition(compressionRate)
        self.dense2 = self._make_denseblock(block, n)
        self.trans2 = self._make_transition(compressionRate)
        self.dense3 = self._make_denseblock(block, n)
        self.bn = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.avgpool = nn.AvgPool2d(8)
        self.fc = nn.Linear(self.inplanes* 64, num_classes)

        # Weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_denseblock(self, block, blocks):
        layers = []
        for i in range(blocks):
            # Currently we fix the expansion ratio as the default value
            layers.append(block(self.inplanes, growthRate=self.growthRate, dropRate=self.dropRate))
            self.inplanes += self.growthRate

        return nn.Sequential(*layers)

    def _make_transition(self, compressionRate):
        inplanes = self.inplanes
        outplanes = int(math.floor(self.inplanes // compressionRate))
        self.inplanes = outplanes
        return Transition(inplanes, outplanes)


    def forward(self, x):
        x = self.conv1(x)

        x = self.trans1(self.dense1(x))
        x = self.trans2(self.dense2(x))
        x = self.dense3(x)
        x = self.bn(x)
        x = self.relu(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


def densenet(**kwargs):
    '''
    Constructs a ResNet model.
    '''
    return DenseNet(**kwargs)

In [None]:
import torch
torch.cuda.empty_cache()

Densenet = densenet().cuda()

Densenet = nn.DataParallel(Densenet)

optimizer = torch.optim.Adam(Densenet.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Densenet.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Densenet(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))

# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)


# 在評估模式下，模型只需要在主 GPU 上執行
Densenet = Densenet.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Densenet.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Densenet.state_dict(), model_path+'Densenet.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Densenet(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Densenet(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Densenet_train_prediction.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Densenet_test_prediction.pt'))


# ResNet

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import copy
import os
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.autograd import Variable

In [14]:
import torch.nn as nn
import math

def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, depth, num_classes=45, block_name='BasicBlock'):
        super(ResNet, self).__init__()
        # Model type specifies number of layers for CIFAR-10 model
        if block_name.lower() == 'basicblock':
            assert (depth - 2) % 6 == 0, 'When use basicblock, depth should be 6n+2, e.g. 20, 32, 44, 56, 110, 1202'
            n = (depth - 2) // 6
            block = BasicBlock
        elif block_name.lower() == 'bottleneck':
            assert (depth - 2) % 9 == 0, 'When use bottleneck, depth should be 9n+2, e.g. 20, 29, 47, 56, 110, 1199'
            n = (depth - 2) // 9
            block = Bottleneck
        else:
            raise ValueError('block_name shoule be Basicblock or Bottleneck')


        self.inplanes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, n)
        self.layer2 = self._make_layer(block, 32, n, stride=2)
        self.layer3 = self._make_layer(block, 64, n, stride=2)
        self.avgpool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64 * block.expansion * 64, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)    # 32x32

        x = self.layer1(x)  # 32x32
        x = self.layer2(x)  # 16x16
        x = self.layer3(x)  # 8x8

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


def resnet(**kwargs):
    '''
    Constructs a ResNet model.
    '''
    return ResNet(**kwargs)

In [15]:
import torch
torch.cuda.empty_cache()

Resnet_20 = resnet(depth=20).cuda()
# 使用 DataParallel 將模型複製到多個 GPU
Resnet_20 = nn.DataParallel(Resnet_20)

optimizer = torch.optim.Adam(Resnet_20.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Resnet_20.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Resnet_20(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))
            
            
# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)

# 在評估模式下，模型只需要在主 GPU 上執行
Resnet_20 = Resnet_20.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Resnet_20.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Resnet_20.state_dict(), model_path+'Resnet_20.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_20(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_20(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)
        

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_20.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_20.pt'))

'\nimport torch\ntorch.cuda.empty_cache()\n\nResnet_20 = resnet(depth=20).cuda()\n# 使用 DataParallel 將模型複製到多個 GPU\nResnet_20 = nn.DataParallel(Resnet_20)\n\noptimizer = torch.optim.Adam(Resnet_20.parameters(), lr=LR)\nloss_function = nn.CrossEntropyLoss()\nlocal_train_data = copy.deepcopy(train_loader)\n\nfor epoch in range(EPOCH):\n    Resnet_20.train()\n    for step, (x, y) in enumerate(local_train_data):\n        # b_x = Variable(x, requires_grad=False)\n        # b_y = Variable(y, requires_grad=False)\n        b_x = x.to(device)  # 將輸入數據移動到GPU上\n        b_y = y.to(device)  # 將標籤移動到GPU上\n        out = Resnet_20(b_x)\n        loss = loss_function(out, b_y)\n\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n\n        if step % 100 == 0:\n            print(\'Epoch: {} | Step: {} | Loss: {}\'.format(epoch + 1, step, loss))\n            \n            \n# 容器來儲存每一輪的輸出\ntrain_predict = torch.Tensor().to(device)\ntrain_label = torch.Tensor().to(device)\ntest_

In [16]:
import torch
torch.cuda.empty_cache()

Resnet_32 = resnet(depth=32).cuda()
# 使用 DataParallel 將模型複製到多個 GPU
Resnet_32 = nn.DataParallel(Resnet_32)

optimizer = torch.optim.Adam(Resnet_32.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Resnet_32.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Resnet_32(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))
            
            
# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)

# 在評估模式下，模型只需要在主 GPU 上執行
Resnet_32 = Resnet_32.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Resnet_32.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Resnet_32.state_dict(), model_path+'Resnet_32.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_32(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_32(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_32.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_32.pt'))

'\nimport torch\ntorch.cuda.empty_cache()\n\nResnet_32 = resnet(depth=32).cuda()\n# 使用 DataParallel 將模型複製到多個 GPU\nResnet_32 = nn.DataParallel(Resnet_32)\n\noptimizer = torch.optim.Adam(Resnet_32.parameters(), lr=LR)\nloss_function = nn.CrossEntropyLoss()\nlocal_train_data = copy.deepcopy(train_loader)\n\nfor epoch in range(EPOCH):\n    Resnet_32.train()\n    for step, (x, y) in enumerate(local_train_data):\n        # b_x = Variable(x, requires_grad=False)\n        # b_y = Variable(y, requires_grad=False)\n        b_x = x.to(device)  # 將輸入數據移動到GPU上\n        b_y = y.to(device)  # 將標籤移動到GPU上\n        out = Resnet_32(b_x)\n        loss = loss_function(out, b_y)\n\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n\n        if step % 100 == 0:\n            print(\'Epoch: {} | Step: {} | Loss: {}\'.format(epoch + 1, step, loss))\n            \n            \n# 容器來儲存每一輪的輸出\ntrain_predict = torch.Tensor().to(device)\ntrain_label = torch.Tensor().to(device)\ntest_

In [17]:
import torch
torch.cuda.empty_cache()

Resnet_44 = resnet(depth=44).cuda()
# 使用 DataParallel 將模型複製到多個 GPU
Resnet_44 = nn.DataParallel(Resnet_44)

optimizer = torch.optim.Adam(Resnet_44.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Resnet_44.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Resnet_44(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))
            
            
# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)

# 在評估模式下，模型只需要在主 GPU 上執行
Resnet_44 = Resnet_44.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Resnet_44.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Resnet_44.state_dict(), model_path+'Resnet_44.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_44(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_44(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_44.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_44.pt'))

'\nimport torch\ntorch.cuda.empty_cache()\n\nResnet_44 = resnet(depth=44).cuda()\n# 使用 DataParallel 將模型複製到多個 GPU\nResnet_44 = nn.DataParallel(Resnet_44)\n\noptimizer = torch.optim.Adam(Resnet_44.parameters(), lr=LR)\nloss_function = nn.CrossEntropyLoss()\nlocal_train_data = copy.deepcopy(train_loader)\n\nfor epoch in range(EPOCH):\n    Resnet_44.train()\n    for step, (x, y) in enumerate(local_train_data):\n        # b_x = Variable(x, requires_grad=False)\n        # b_y = Variable(y, requires_grad=False)\n        b_x = x.to(device)  # 將輸入數據移動到GPU上\n        b_y = y.to(device)  # 將標籤移動到GPU上\n        out = Resnet_44(b_x)\n        loss = loss_function(out, b_y)\n\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n\n        if step % 100 == 0:\n            print(\'Epoch: {} | Step: {} | Loss: {}\'.format(epoch + 1, step, loss))\n            \n            \n# 容器來儲存每一輪的輸出\ntrain_predict = torch.Tensor().to(device)\ntrain_label = torch.Tensor().to(device)\ntest_

In [18]:
import torch
torch.cuda.empty_cache()

Resnet_56 = resnet(depth=56).cuda()
# 使用 DataParallel 將模型複製到多個 GPU
Resnet_56 = nn.DataParallel(Resnet_56)

optimizer = torch.optim.Adam(Resnet_56.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Resnet_56.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Resnet_56(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))
            
            
# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)

# 在評估模式下，模型只需要在主 GPU 上執行
Resnet_56 = Resnet_56.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Resnet_56.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Resnet_56.state_dict(), model_path+'Resnet_56.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_56(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_56(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_56.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_56.pt'))

'\nimport torch\ntorch.cuda.empty_cache()\n\nResnet_56 = resnet(depth=56).cuda()\n# 使用 DataParallel 將模型複製到多個 GPU\nResnet_56 = nn.DataParallel(Resnet_56)\n\noptimizer = torch.optim.Adam(Resnet_56.parameters(), lr=LR)\nloss_function = nn.CrossEntropyLoss()\nlocal_train_data = copy.deepcopy(train_loader)\n\nfor epoch in range(EPOCH):\n    Resnet_56.train()\n    for step, (x, y) in enumerate(local_train_data):\n        # b_x = Variable(x, requires_grad=False)\n        # b_y = Variable(y, requires_grad=False)\n        b_x = x.to(device)  # 將輸入數據移動到GPU上\n        b_y = y.to(device)  # 將標籤移動到GPU上\n        out = Resnet_56(b_x)\n        loss = loss_function(out, b_y)\n\n        optimizer.zero_grad()\n        loss.backward()\n        optimizer.step()\n\n        if step % 100 == 0:\n            print(\'Epoch: {} | Step: {} | Loss: {}\'.format(epoch + 1, step, loss))\n            \n            \n# 容器來儲存每一輪的輸出\ntrain_predict = torch.Tensor().to(device)\ntrain_label = torch.Tensor().to(device)\ntest_

In [19]:
import torch
torch.cuda.empty_cache()

Resnet_110 = resnet(depth=110).cuda()
# 使用 DataParallel 將模型複製到多個 GPU
Resnet_110 = nn.DataParallel(Resnet_110)

optimizer = torch.optim.Adam(Resnet_110.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Resnet_110.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Resnet_110(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))
            
            
# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)

# 在評估模式下，模型只需要在主 GPU 上執行
Resnet_110 = Resnet_110.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Resnet_110.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Resnet_110.state_dict(), model_path+'Resnet_110.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_110(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Resnet_110(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_110_train_prediction.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Resnet_110_test_prediction.pt'))

Epoch: 1 | Step: 0 | Loss: 5.28169584274292
Epoch: 1 | Step: 100 | Loss: 3.9966320991516113
Epoch: 1 | Step: 200 | Loss: 3.562718629837036
Epoch: 1 | Step: 300 | Loss: 3.560755729675293
Epoch: 2 | Step: 0 | Loss: 3.686568021774292
Epoch: 2 | Step: 100 | Loss: 3.6070845127105713
Epoch: 2 | Step: 200 | Loss: 3.5842502117156982
Epoch: 2 | Step: 300 | Loss: 3.2494888305664062
Epoch: 3 | Step: 0 | Loss: 3.6376397609710693
Epoch: 3 | Step: 100 | Loss: 3.5706472396850586
Epoch: 3 | Step: 200 | Loss: 3.512605667114258
Epoch: 3 | Step: 300 | Loss: 3.2218384742736816
Epoch: 4 | Step: 0 | Loss: 3.1068105697631836
Epoch: 4 | Step: 100 | Loss: 3.218820095062256
Epoch: 4 | Step: 200 | Loss: 3.097757339477539
Epoch: 4 | Step: 300 | Loss: 3.131363868713379
Epoch: 5 | Step: 0 | Loss: 2.756802558898926
Epoch: 5 | Step: 100 | Loss: 3.28602933883667
Epoch: 5 | Step: 200 | Loss: 2.690990924835205
Epoch: 5 | Step: 300 | Loss: 3.3440909385681152
Epoch: 6 | Step: 0 | Loss: 2.893430471420288
Epoch: 6 | Step: 1

# VGG

In [None]:
'''VGG for CIFAR10. FC layers are removed.
'''
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import math


__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]


model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
}


class VGG(nn.Module):

    def __init__(self, features, num_classes=45):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Linear(512*64, num_classes)
        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)


cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def vgg11(**kwargs):
    '''VGG 11-layer model (configuration "A")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    '''
    model = VGG(make_layers(cfg['A']), **kwargs)
    return model


def vgg11_bn(**kwargs):
    '''VGG 11-layer model (configuration "A") with batch normalization'''
    model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
    return model


def vgg13(**kwargs):
    '''VGG 13-layer model (configuration "B")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    '''
    model = VGG(make_layers(cfg['B']), **kwargs)
    return model


def vgg13_bn(**kwargs):
    '''VGG 13-layer model (configuration "B") with batch normalization'''
    model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
    return model


def vgg16(**kwargs):
    '''VGG 16-layer model (configuration "D")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    '''
    model = VGG(make_layers(cfg['D']), **kwargs)
    return model


def vgg16_bn(**kwargs):
    '''VGG 16-layer model (configuration "D") with batch normalization'''
    model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
    return model


def vgg19(**kwargs):
    '''VGG 19-layer model (configuration "E")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    '''
    model = VGG(make_layers(cfg['E']), **kwargs)
    return model


def vgg19_bn(**kwargs):
    '''VGG 19-layer model (configuration 'E') with batch normalization'''
    model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
    return model

In [None]:
import torch
torch.cuda.empty_cache()

Vgg16_bn = vgg16_bn().cuda()

Vgg16_bn = nn.DataParallel(Vgg16_bn)

optimizer = torch.optim.Adam(Vgg16_bn.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Vgg16_bn.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Vgg16_bn(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))

# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)


# 在評估模式下，模型只需要在主 GPU 上執行
Vgg16_bn = Vgg16_bn.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Vgg16_bn.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Vgg16_bn.state_dict(), model_path+'Vgg16_bn.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Vgg16_bn(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Vgg16_bn(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Vgg16_bn_train_prediction.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Vgg16_bn_test_prediction.pt'))            

In [None]:
import torch
torch.cuda.empty_cache()

Vgg19_bn = vgg19_bn().cuda()

Vgg19_bn = nn.DataParallel(Vgg19_bn)
optimizer = torch.optim.Adam(Vgg19_bn.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Vgg19_bn.train()
    for step, (x, y) in enumerate(local_train_data):
        # b_x = Variable(x, requires_grad=False)
        # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Vgg19_bn(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))


# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)


# 在評估模式下，模型只需要在主 GPU 上執行
Vgg19_bn = Vgg19_bn.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Vgg19_bn.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Vgg19_bn.state_dict(), model_path+'Vgg19_bn.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Vgg19_bn(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Vgg19_bn(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Vgg19_bn_train_prediction.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Vgg19_bn_test_prediction.pt'))      

# Wrn


In [20]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(nb_layers):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)

class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=1, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert (depth - 4) % 6 == 0, 'depth should be 6n+4'
        n = (depth - 4) // 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3]*64, num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()

    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        #out = out.view(-1, self.nChannels)
        out = out.view(out.size(0), -1)
        return self.fc(out)

def wrn(**kwargs):
    '''
    Constructs a Wide Residual Networks.
    '''
    model = WideResNet(**kwargs)
    return model

In [22]:
import torch
torch.cuda.empty_cache()

Wrn = wrn(depth=28, num_classes=45).cuda()
# 使用 DataParallel 將模型複製到多個 GPU
Wrn = nn.DataParallel(Wrn)

optimizer = torch.optim.Adam(Wrn.parameters(), lr=LR)
loss_function = nn.CrossEntropyLoss()
local_train_data = copy.deepcopy(train_loader)

for epoch in range(EPOCH):
    Wrn.train()
    for step, (x, y) in enumerate(local_train_data):
    # b_x = Variable(x, requires_grad=False)
    # b_y = Variable(y, requires_grad=False)
        b_x = x.to(device)  # 將輸入數據移動到GPU上
        b_y = y.to(device)  # 將標籤移動到GPU上
        out = Wrn(b_x)
        loss = loss_function(out, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            print('Epoch: {} | Step: {} | Loss: {}'.format(epoch + 1, step, loss))

            
# 容器來儲存每一輪的輸出
train_predict = torch.Tensor().to(device)
train_label = torch.Tensor().to(device)
test_predict = torch.Tensor().to(device)
test_label = torch.Tensor().to(device)

# 在評估模式下，模型只需要在主 GPU 上執行
Wrn = Wrn.module

local_train_loader = copy.deepcopy(train_loader)
local_test_loader = copy.deepcopy(test_loader)
# 測試集
Wrn.eval()  # 切換到評估模式
model_path = '/kaggle/working/'
torch.save(Wrn.state_dict(), model_path+'Wrn.pth')


with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_train_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Wrn(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        train_predict = torch.cat((train_predict, out), dim=0)
        train_label = torch.cat((train_label, b_y), dim=0)

with torch.no_grad():  # 在評估模式下，不計算梯度
    for step, (x, y) in enumerate(local_test_loader):
        b_x = x.to(device)
        b_y = y.to(device)
        out = Wrn(b_x)
        # 將該批次的預測結果和標籤添加到對應的容器中
        test_predict = torch.cat((test_predict, out), dim=0)
        test_label = torch.cat((test_label, b_y), dim=0)

# train_predict、train_label 分別包含了每一輪的模型訓練集預測和標籤
# test_predict、test_label 分別包含了模型對測試集的預測和標籤
print("Shape of train_predict:", train_predict.shape)
print("Shape of train_label:", train_label.shape)
print("Shape of test_predict:", test_predict.shape)
print("Shape of test_label:", test_label.shape)


output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_predict,'labelVectors':train_label}
torch.save(temp_dict, os.path.join(output_path, 'Wrn_28_train_prediction.pt'))


output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':test_predict,'labelVectors':test_label}
torch.save(temp_dict, os.path.join(output_path, 'Wrn_28_test_prediction.pt'))
            

'''
            
temp_train_loader = copy.deepcopy(train_loader)
temp_train_x, temp_train_y = next(iter(temp_train_loader))

Wrn.eval()  # 將模型設置為評估模式
with torch.no_grad():  # 關閉梯度計算
    temp_train_x, temp_train_y = temp_train_x.to(device), temp_train_y.to(device)  # 將測試數據移動到GPU上
    train_prediction = torch.argmax(Wrn(temp_train_x), 1)
    acc = torch.eq(train_prediction, temp_train_y)
    accuracy = torch.sum(acc) / acc.shape[0]
    print('Accuracy: {:.2%}'.format(accuracy.item()))

# 保存模型的方式建議使用 torch.save(model.state_dict(), path) 以保存模型的狀態字典
output_path = OUTPUT_PATH+'train'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':train_prediction.tolist(),'labelVectors':temp_train_y.tolist()}
torch.save(temp_dict, os.path.join(output_path, 'Wrn_28_train_prediction.pt'))

local_test_loader = copy.deepcopy(test_loader)
temp_test_x, temp_test_y = next(iter(local_test_loader))

#alexNet.eval()  # 將模型設置為評估模式
with torch.no_grad():  # 關閉梯度計算
    temp_test_x, temp_test_y = temp_test_x.to(device), temp_test_y.to(device)  # 將測試數據移動到GPU上
    prediction = torch.argmax(Wrn(temp_test_x), 1)
    acc = torch.eq(prediction, temp_test_y)
    accuracy = torch.sum(acc) / acc.shape[0]
    print('Accuracy: {:.2%}'.format(accuracy.item()))

# 保存模型的方式建議使用 torch.save(model.state_dict(), path) 以保存模型的狀態字典
output_path = OUTPUT_PATH+'test'
if not os.path.exists(output_path):
    os.mkdir(output_path)
temp_dict = {'predictionVectors':prediction.tolist(),'labelVectors':temp_test_y.tolist()}
torch.save(temp_dict, os.path.join(output_path, 'Wrn_28_test_prediction.pt'))
'''

Epoch: 1 | Step: 0 | Loss: 3.8339362144470215
Epoch: 1 | Step: 100 | Loss: 3.527473211288452
Epoch: 1 | Step: 200 | Loss: 3.6259005069732666
Epoch: 1 | Step: 300 | Loss: 3.5357964038848877
Epoch: 2 | Step: 0 | Loss: 3.137824296951294
Epoch: 2 | Step: 100 | Loss: 3.1813905239105225
Epoch: 2 | Step: 200 | Loss: 3.226167678833008
Epoch: 2 | Step: 300 | Loss: 3.027867317199707
Epoch: 3 | Step: 0 | Loss: 3.1532201766967773
Epoch: 3 | Step: 100 | Loss: 3.101816415786743
Epoch: 3 | Step: 200 | Loss: 3.036646842956543
Epoch: 3 | Step: 300 | Loss: 3.03568696975708
Epoch: 4 | Step: 0 | Loss: 2.922797918319702
Epoch: 4 | Step: 100 | Loss: 2.4767239093780518
Epoch: 4 | Step: 200 | Loss: 3.0411837100982666
Epoch: 4 | Step: 300 | Loss: 2.941531181335449
Epoch: 5 | Step: 0 | Loss: 2.5725793838500977
Epoch: 5 | Step: 100 | Loss: 2.791670322418213
Epoch: 5 | Step: 200 | Loss: 2.9361572265625
Epoch: 5 | Step: 300 | Loss: 2.968845844268799
Epoch: 6 | Step: 0 | Loss: 2.65017032623291
Epoch: 6 | Step: 100 

"\n            \ntemp_train_loader = copy.deepcopy(train_loader)\ntemp_train_x, temp_train_y = next(iter(temp_train_loader))\n\nWrn.eval()  # 將模型設置為評估模式\nwith torch.no_grad():  # 關閉梯度計算\n    temp_train_x, temp_train_y = temp_train_x.to(device), temp_train_y.to(device)  # 將測試數據移動到GPU上\n    train_prediction = torch.argmax(Wrn(temp_train_x), 1)\n    acc = torch.eq(train_prediction, temp_train_y)\n    accuracy = torch.sum(acc) / acc.shape[0]\n    print('Accuracy: {:.2%}'.format(accuracy.item()))\n\n# 保存模型的方式建議使用 torch.save(model.state_dict(), path) 以保存模型的狀態字典\noutput_path = OUTPUT_PATH+'train'\nif not os.path.exists(output_path):\n    os.mkdir(output_path)\ntemp_dict = {'predictionVectors':train_prediction.tolist(),'labelVectors':temp_train_y.tolist()}\ntorch.save(temp_dict, os.path.join(output_path, 'Wrn_28_train_prediction.pt'))\n\nlocal_test_loader = copy.deepcopy(test_loader)\ntemp_test_x, temp_test_y = next(iter(local_test_loader))\n\n#alexNet.eval()  # 將模型設置為評估模式\nwith torch.no_grad(

#VGG


#wrn

#else
