In [1]:
import torch
import os
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import cv2
import time

# 关键部分，怎样构建损失函数来训练模型

In [2]:
def loss_fn_kd(outputs, labels, teacher_outputs, T=20, alpha=0.5):

    hard_loss = F.cross_entropy(outputs, labels) * (1. - alpha)
    
    soft_loss = nn.KLDivLoss(reduction='batchmean')(F.log_softmax(outputs/T, dim=1),
                             F.softmax(teacher_outputs/T, dim=1)) * (alpha * T * T)
    return hard_loss + soft_loss

In [3]:
def readfile(path, label):
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        print(os.path.join(path, file))
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
          y[i] = int(file.split("_")[0])
    if label:
      return x, y
    else:
      return x

In [None]:
workspace_dir = "G:/Model_Compress/Dataset"
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)

In [5]:
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), 
    transforms.RandomRotation(15), 
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [6]:
batch_size = 16
train_set = ImgDataset(train_x, train_y, train_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = train_loader
val_set = train_set

In [7]:
class StudentNet(nn.Module):
    def __init__(self):
        super(StudentNet, self).__init__()
        
        # input size: (3, 128, 128)
        self.CNN = nn.Sequential(
            nn.Conv2d(3, 3, 3, 1, 1, groups=3),
            nn.BatchNorm2d(3),
            nn.ReLU(),
            nn.Conv2d(3, 64, 1),
            nn.MaxPool2d(2, 2, 0),
            
            nn.Conv2d(64, 64, 3, 1, 1, groups=64),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 128, 1),
            nn.MaxPool2d(2, 2, 0),
            
            nn.Conv2d(128, 128, 3, 1, 1, groups=128),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 256, 1),
            nn.MaxPool2d(2, 2, 0),
            
            nn.Conv2d(256, 256, 3, 1, 1, groups=256),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 512, 1),
            nn.MaxPool2d(2, 2, 0),
            
            nn.Conv2d(512, 512, 3, 1, 1, groups=512),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, 1),
            nn.MaxPool2d(2, 2, 0),
            
            # Channel x H x W -> C x 1 x 1
            nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.fc = nn.Sequential(
            nn.Linear(512, 2)
        )
    def forward(self, x):
        out = self.CNN(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [8]:
def set_param_requires_grad(model, feature_extract):
    count = 0
    if feature_extract:
        for param in model.parameters():
            param.requires_grad = False
            # print("fine tuning")
    elif not feature_extract:
        for param in model.parameters():
            param.requires_grad = True
            count += 1

teacher_net = models.resnet18(pretrained=False)
set_param_requires_grad(teacher_net, True)  
fc_new = teacher_net.fc.in_features
teacher_net.fc = nn.Linear(fc_new, 2)

In [9]:
model = teacher_net.cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epoch = 15

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() 
    for i, data in enumerate(train_loader):
        optimizer.zero_grad()
        
        train_pred = model(data[0].cuda()) 
        
        batch_loss = loss(train_pred, data[1].cuda()) 
        batch_loss.backward()
        optimizer.step() 

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/015] 12.65 sec(s) Train Acc: 0.525000 Loss: 0.044899 | Val Acc: 0.500000 loss: 0.046675
[002/015] 10.82 sec(s) Train Acc: 0.632500 Loss: 0.040311 | Val Acc: 0.500000 loss: 0.050105
[003/015] 10.86 sec(s) Train Acc: 0.637500 Loss: 0.040142 | Val Acc: 0.757500 loss: 0.038593
[004/015] 11.23 sec(s) Train Acc: 0.667500 Loss: 0.037705 | Val Acc: 0.675000 loss: 0.037316
[005/015] 11.51 sec(s) Train Acc: 0.695000 Loss: 0.038062 | Val Acc: 0.700000 loss: 0.038292
[006/015] 11.57 sec(s) Train Acc: 0.672500 Loss: 0.038077 | Val Acc: 0.767500 loss: 0.034274
[007/015] 11.55 sec(s) Train Acc: 0.747500 Loss: 0.034700 | Val Acc: 0.712500 loss: 0.037826
[008/015] 11.56 sec(s) Train Acc: 0.720000 Loss: 0.035251 | Val Acc: 0.757500 loss: 0.033832
[009/015] 11.56 sec(s) Train Acc: 0.767500 Loss: 0.033306 | Val Acc: 0.665000 loss: 0.035366
[010/015] 11.56 sec(s) Train Acc: 0.747500 Loss: 0.033239 | Val Acc: 0.757500 loss: 0.032490
[011/015] 11.56 sec(s) Train Acc: 0.745000 Loss: 0.032800 | Val Acc: 0

In [10]:
weights = model.state_dict()
torch.save(weights, "ResNet18_landslide.pth")

In [9]:
teacher_net.load_state_dict(torch.load("ResNet18_landslide.pth"))
teacher_net.cuda()
student_net = StudentNet().cuda()
optimizer = optim.AdamW(student_net.parameters(), lr=1e-3)

In [10]:
def run_epoch(dataloader, update=True, alpha=0.5):
    total_num, total_hit, total_loss = 0, 0, 0
    for now_step, batch_data in enumerate(dataloader):

        optimizer.zero_grad()

        inputs, hard_labels = batch_data
        inputs = inputs.cuda()
        hard_labels = torch.LongTensor(hard_labels).cuda()
        
        with torch.no_grad():
            soft_labels = teacher_net(inputs)

        if update:
            logits = student_net(inputs)
            loss = loss_fn_kd(logits, hard_labels, soft_labels, 20, alpha)
            loss.backward()
            optimizer.step()    
        else:
            with torch.no_grad():
                logits = student_net(inputs)
                loss = loss_fn_kd(logits, hard_labels, soft_labels, 20, alpha)
            
        total_hit += torch.sum(torch.argmax(logits, dim=1) == hard_labels).item()
        total_num += len(inputs)

        total_loss += loss.item() * len(inputs)
    return total_loss / total_num, total_hit / total_num



teacher_net.eval()
now_best_acc = 0
for epoch in range(15):
    student_net.train()
    train_loss, train_acc = run_epoch(train_loader, update=True)
    student_net.eval()
    valid_loss, valid_acc = run_epoch(val_loader, update=False)

    # 存下最好的model。
    if valid_acc > now_best_acc:
        now_best_acc = valid_acc
        torch.save(student_net.state_dict(), 'student_model.bin')
    print('epoch {:>3d}: train loss: {:6.4f}, acc {:6.4f} valid loss: {:6.4f}, acc {:6.4f}'.format(
        epoch, train_loss, train_acc, valid_loss, valid_acc))

epoch   0: train loss: 0.3515, acc 0.7350 valid loss: 0.5751, acc 0.5000
epoch   1: train loss: 0.2528, acc 0.8550 valid loss: 0.5973, acc 0.5375
epoch   2: train loss: 0.2419, acc 0.8650 valid loss: 0.2278, acc 0.8775
epoch   3: train loss: 0.2344, acc 0.8875 valid loss: 0.1984, acc 0.9275
epoch   4: train loss: 0.2108, acc 0.9050 valid loss: 0.1896, acc 0.9400
epoch   5: train loss: 0.2394, acc 0.8675 valid loss: 0.2024, acc 0.9175
epoch   6: train loss: 0.2194, acc 0.9150 valid loss: 0.1842, acc 0.9375
epoch   7: train loss: 0.2191, acc 0.9275 valid loss: 0.1920, acc 0.9550
epoch   8: train loss: 0.2208, acc 0.8875 valid loss: 0.1793, acc 0.9575
epoch   9: train loss: 0.2121, acc 0.9300 valid loss: 0.1813, acc 0.9500
epoch  10: train loss: 0.2314, acc 0.9075 valid loss: 0.2034, acc 0.9175
epoch  11: train loss: 0.2022, acc 0.9275 valid loss: 0.1779, acc 0.9600
epoch  12: train loss: 0.2182, acc 0.8900 valid loss: 0.1868, acc 0.9375
epoch  13: train loss: 0.2207, acc 0.9050 valid los

In [17]:
small_net = StudentNet().cuda()

In [18]:
small_net.load_state_dict(torch.load("student_model.bin"))

<All keys matched successfully>