In [155]:
import torch
import torchvision
import os
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
from torch import nn
from PIL import Image
import math
import torch.utils.model_zoo as model_zoo
from sklearn.model_selection import StratifiedKFold


In [156]:
program_path  = "./classify-birds/" 
train_path = program_path + "train_set"
# val_path = program_path + "val_set"
test_path = program_path + "test_set"
classes_path = program_path + "classes.txt"
pred_path = './'



In [157]:
def get_image_list(path):
    lst = []
    for img in sorted(os.listdir(path)):
        if img != '.ipynb_checkpoints':
            lst.append(img)            # 1.jpg
    return lst


train_list = get_image_list(train_path)
test_list = get_image_list(test_path)

print(train_list[:2], len(train_list))
print(test_list[:2], len(test_list))


['001.Black_footed_Albatross_112.jpg', '001.Black_footed_Albatross_1131.jpg'] 10010
['1.jpg', '10.jpg'] 1768


In [158]:
def get_kfold(image_list, k):
    indexs = [int(img.split('.')[0]) - 1 for img in image_list]
    skf = StratifiedKFold(n_splits=k)
    train_kfold = []
    valid_kfold = []
    for fold, (train_idx, val_idx) in enumerate(skf.split(image_list, indexs)):
        train_kfold.append([image_list[i] for i in train_idx])
        valid_kfold.append([image_list[j] for j in val_idx])
    return train_kfold, valid_kfold

tk, vk = get_kfold(train_list, 5)


In [159]:
def cal_data(train_list):
    dct = {}
    for img in train_list:
        label = (img.split('.')[0])
        if label not in dct:
            dct[label] = 1
        else:
            dct[label] += 1
    print(dct)
# 平均每个类别有50张左右的图片

In [160]:
def get_match_dict(classes_path):
    match_dict = {}
    with open(classes_path, 'r') as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            no = int(line.split(" ")[0])
            name = line.split(" ")[1].split(".")[1]
            match_dict[no] = name
    return match_dict

match_dict = get_match_dict(classes_path)

In [161]:
class BirdDataSet(Dataset):
    def __init__(self, image_list, image_folder, transform=None):
        super().__init__()
        self.image_list = image_list           # 图像名称的列表
        self.image_foder = image_folder        # 图像所在文件夹
        self.transform = transform             
        
    def __len__(self):
        return len(self.image_list)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.image_foder, self.image_list[idx])
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = int(self.image_list[idx].split('.')[0]) - 1
        return image, label

In [162]:
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, ratio=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels//ratio, kernel_size=1, bias=False),
            nn.ReLU(),
            nn.Conv2d(in_channels//ratio, in_channels, kernel_size=1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
        

In [165]:
class BasicBlock(nn.Module):
    expansion = 1  #通道升降维倍数
    def __init__(self, in_channels, channels, stride=1, downsample=None):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=3, 
                                stride=stride, padding=1)                    #第一个卷积层，通过stride进行下采样
        self.bn1 = nn.BatchNorm2d(channels)

        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3,            #第二个卷积层，不进行下采样
                                stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(channels)
        
        self.ca = ChannelAttention(channels)                                 #加入卷积注意力模块
        self.sa = SpatialAttention()

        self.downsample = downsample
        self.stride = stride
        
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        residual = x

        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))

        out = self.ca(out) * out
        out = self.sa(out) * out


        if self.downsample is not None:
            residual = self.downsample(x)                                       #通道数不变，1x1卷积层仅用于降采样

        out += residual
        return self.relu(out)


class Bottleneck(nn.Module):
    expansion = 4  # 通道升降维背书

    def __init__(self, in_channels, channels, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, channels,                     #第一个是1x1卷积
                                kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)                                 

        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3,        #第二个是3x3卷积，通过stride进行下采样
                                stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)

        self.conv3 = nn.Conv2d(channels, channels * self.expansion,      #第三个是1x1卷积
                                kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * channels)

        self.ca = ChannelAttention(channels * self.expansion)           #加入卷积注意力模块
        self.sa = SpatialAttention()

        self.downsample = downsample
        self.stride = stride
    
    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))

        out = self.ca(out) * out
        out = self.sa(out) * out

        if self.downsample:
            residual = self.downsample(x)                                       #通道数变化，需要在shortcut中加入1x1卷积升维，同时降采样

        out += residual

        return F.relu(out)


class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        self.in_channels = 64
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 64, layers[0])                #第一个残差层不进行下采样
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        self.embedding = nn.Linear(num_classes, 512)
        self.classify = nn.Linear(512, 200)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, channels, blocks, stride=1):                          # block：basicblock or bottleneck
        downsample = None
        if stride != 1 or self.in_channels != channels * block.expansion:              # 前一种操作需要下采样，后一种操作需要融合通道
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, channels*block.expansion, kernel_size=1, 
                        stride=stride, bias=False),
                nn.BatchNorm2d(channels*block.expansion)
            )
        layers = []
        layers.append(block(self.in_channels, channels, stride, downsample))         #第一个残差块
        self.in_channels = channels * block.expansion                                
        for i in range(1, blocks):                                                   #后续残差块，需要改变in_channels，使其对应上一个残差块的channels
            layers.append(block(self.in_channels, channels))
        
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.fc(x)
        x = self.embedding(x)
        x = self.relu(x)
        x = self.classify(x)
        return x

In [166]:
model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}

def resnet34_cbam(pretrained=False, **kwargs):
    
    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    if pretrained:
        pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
        now_state_dict        = model.state_dict()
        now_state_dict.update(pretrained_state_dict)
        model.load_state_dict(now_state_dict)
    return model


In [169]:
num_epoch, batch_size, lr = 13, 48, 0.0005

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(0.4, 0.2, 0.4, 0.2),
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])


val_transform = transforms.Compose([
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

device = torch.device('cuda')

models = [resnet34_cbam(True) for i in range(5)]
for model in models:
    model.to(device)

optimizer = optim.Adam(
    [{"params": mlp.parameters()} for mlp in models],
    lr=lr
)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9)

criterion = nn.CrossEntropyLoss()

In [170]:
def valid(model, val_set, val_iter, epoch):
    model.eval()
    val_acc = 0
    val_loss = 0
    for step, (X, y) in enumerate(val_iter):
        X = X.to(device)
        y = y.to(device)      
        
        y_hat = model(X)
        loss = criterion(y_hat, y)
        pred = torch.argmax(y_hat, dim=1)
                
        accuracy = torch.sum(pred == y).item()
        val_acc += accuracy
        val_loss += loss.item()

        
    val_acc /= len(val_set)
    val_loss /= (step + 1)
    if epoch % 3 == 0:
        print(f"------valid| acc={val_acc:.6f}, loss={val_loss:.6f}-----")

    return val_acc


In [171]:

def train(models):
    
    for k in range(5):
        acc = 0.0
        train_set = BirdDataSet(tk[k], train_path, transform=train_transform)
        train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=6)

        val_set = BirdDataSet(vk[k], train_path, transform=val_transform)
        val_iter = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=6)
        
        for epoch in range(num_epoch):
            models[k].train()
            epoch_acc = 0
            epoch_loss = 0
            for step, (X, y) in enumerate(train_iter):
                X = X.to(device)
                y = y.to(device)
            
                y_hat = models[k](X)
                loss = criterion(y_hat, y)
                    
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                pred = torch.argmax(y_hat, dim=1)
                accuracy = torch.sum(pred == y).item()
                epoch_acc += accuracy
                epoch_loss += loss.item()

              
            epoch_acc /= len(train_set)
            epoch_loss /= (step + 1)
            if epoch % 3 == 0:
                print(f"|model{k} epoch{epoch:>2d}| acc={epoch_acc:.6f}, loss={epoch_loss:.6f}")
            
            scheduler.step()
            val_acc = valid(models[k], val_set, val_iter, epoch)
            
        
            if val_acc > acc:
                torch.save(models[k].state_dict(), f'./model{k}.pth')
                acc = val_acc
       
            

In [172]:
train(models)

|model0 epoch 0| acc=0.032592, loss=4.806515
------valid| acc=0.068432, loss=4.194335-----
|model0 epoch 3| acc=0.388487, loss=2.196995
------valid| acc=0.347153, loss=2.545099-----
|model0 epoch 6| acc=0.665584, loss=1.106947
------valid| acc=0.491009, loss=1.972775-----
|model0 epoch 9| acc=0.810190, loss=0.596662
------valid| acc=0.554446, loss=1.832113-----
|model0 epoch12| acc=0.912338, loss=0.270933
------valid| acc=0.533467, loss=2.267678-----
|model1 epoch 0| acc=0.056319, loss=4.533703
------valid| acc=0.109391, loss=4.135606-----
|model1 epoch 3| acc=0.522478, loss=1.622074
------valid| acc=0.449051, loss=1.975354-----
|model1 epoch 6| acc=0.769730, loss=0.754174
------valid| acc=0.450549, loss=2.233846-----
|model1 epoch 9| acc=0.884241, loss=0.365400
------valid| acc=0.595904, loss=1.801501-----
|model1 epoch12| acc=0.947552, loss=0.171486
------valid| acc=0.593407, loss=1.970936-----
|model2 epoch 0| acc=0.054695, loss=4.538333
------valid| acc=0.129371, loss=3.654449-----

In [182]:
val_transform = transforms.Compose([
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

import numpy as np
from collections import Counter

def predict(models, test_list, image_folder, pred_path, mode='gpu'):
    if mode == 'gpu':
        for model in models:
            model.to(torch.device('cuda'))
    
    with open(pred_path + "pred.csv", "w") as f:
        for i in range(len(test_list)):
            name = str(i + 1) + ".jpg"
            ipath = os.path.join(image_folder, name)
            img = Image.open(ipath).convert('RGB')
            img = val_transform(img)
            img = img.unsqueeze(dim=0)
            if mode == 'gpu':
                img = img.to(torch.device('cuda'))
            vote = []
            for mlp in models:
                pred = mlp(img).argmax(dim=1)
                vote.append(pred.cpu().numpy())
            vote = np.array(vote)
            label = Counter(vote[:, 0]).most_common(1)[0][0] + 1
            f.write("{},{}\n".format(name, label))
            if i % 100 == 0 : 
                print(name, match_dict[label])

            

            


In [None]:
predict(models, test_list, test_path, pred_path)

1.jpg Painted_Bunting
101.jpg European_Goldfinch
201.jpg House_Wren
301.jpg Northern_Fulmar
401.jpg Chipping_Sparrow
501.jpg Frigatebird
601.jpg Forsters_Tern
701.jpg Winter_Wren
801.jpg Green_Kingfisher
901.jpg Sage_Thrasher
1001.jpg Chuck_will_Widow
1101.jpg Western_Meadowlark
1201.jpg Pacific_Loon
1301.jpg Cliff_Swallow
1401.jpg Pine_Grosbeak


In [179]:
clones = [resnet34_cbam() for i in range(5)]
for i, clone in enumerate(clones):
    clone.to(torch.device('cuda'))
    clone.load_state_dict(torch.load(f"model{i}.pth"))
    clone.eval()

In [None]:
predict(clones, test_list, test_path, pred_path)

In [178]:
model = resnet34_cbam(False)
model.to(device)
model.load_state_dict(torch.load('model4.pth'))
model.eval()
print()


