In [10]:
#!pip install ttach
#!pip install resnest --pre

In [11]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import ttach as tta
from resnest.torch import resnest50

import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import transforms
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.model_selection import KFold
from PIL import Image
import os
import matplotlib.pyplot as plt
import torchvision.models as models
# This is for the progress bar.
from tqdm import tqdm

In [12]:
# Configuration options
k_folds = 5
num_epochs = 35
learning_rate = 5e-5
weight_decay = 1e-3
batch_size = 64
# For fold results
results = {}

# Set fixed random number seed
torch.manual_seed(42)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=k_folds, shuffle=True)
# device
devices = [torch.device('cuda:0'),torch.device('cuda:1')]
device = devices[0]

train_path = '../input/d2lclassifyleaves/train.csv'
test_path = '../input/d2lclassifyleaves/test.csv'
img_path = '../input/d2lclassifyleaves/'

In [13]:
# class <-> num
train_df = pd.read_csv(train_path)
c_name = train_df.label.unique()
class_to_num = dict(zip(c_name,range(len(c_name))))
num_to_class = dict(zip(range(len(c_name)),c_name))


In [14]:
# dataset
class LeavesData(Dataset):
    def __init__(self,csv_path,img_path,mode='train/valid',transform=None):
        super().__init__()
        self.mode = mode
        self.csv_path = csv_path
        self.img_path = img_path
        self.data_df = pd.read_csv(csv_path)
        #分开 img_name/label_name
        self.img_arr = np.asarray(self.data_df.iloc[:,0])
        if mode != 'test':
            self.lab_arr = np.asarray(self.data_df.iloc[:,1])
        self.data_len = len(self.img_arr)
        print('Finished reading the {} set of Leaves Dataset ({} samples found)'
              .format(mode, self.data_len))
        if transform == None:
            self.transform = transforms.Compose([
                transforms.Resize((224,224)),
                transforms.ToTensor()
            ])
        else:
            self.transform = transform
        
    def __getitem__(self, index):   # return img,label/img
        img_name = self.img_arr[index]
        img = Image.open(self.img_path+img_name)
        img = self.transform(img)
        if self.mode == 'train/valid':  
            return img,class_to_num[self.lab_arr[index]]
        else:
            return img
    
    def __len__(self):
        return self.data_len
        


In [15]:
# trans
train_transform = transforms.Compose([
    # 随机裁剪图像，所得图像为原始面积的0.08到1之间，高宽比在3/4和4/3之间。
    # 然后，缩放图像以创建224 x 224的新图像
    transforms.RandomResizedCrop(224, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0)),
    transforms.RandomHorizontalFlip(),
    # 随机更改亮度，对比度和饱和度
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    # 添加随机噪声
    transforms.ToTensor(),
    # 标准化图像的每个通道
    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])

val_test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])

In [16]:
# load_pre model
def model_sel(m_name):
    if m_name == 'resnest50':
        model = resnest50(pretrained=True)
    elif m_name == 'resnext':
        model = models.resnext50_32x4d(pretrained=True)
    elif m_name == 'densenet':
        model =  models.densenet161(pretrained=True)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features,176)
    nn.init.xavier_normal_(model.fc.weight)
    return model

In [17]:
# train
def train(m_name):
    print('--------------training-------------------')
    
    # K-fold Cross Validation model evaluation
    for fold,(train_ids,valid_ids) in enumerate(kfold.split(LeavesData(train_path,img_path))): # split train/valid
         model = model_sel(m_name) # DadaParallel之后就没有.fc属性了，参数组就会出错

         # 随机采样器
         train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
         valid_subsampler = torch.utils.data.SubsetRandomSampler(valid_ids)
         # dataset
         train_dataset = LeavesData(train_path,img_path,transform=train_transform)
         valid_dataset = LeavesData(train_path,img_path,transform=val_test_transform)
         # data_loader
         train_iter = DataLoader(train_dataset,batch_size=batch_size,sampler=train_subsampler,num_workers=4)  # 采样器打乱顺序
         valid_iter = DataLoader(valid_dataset,batch_size=batch_size,sampler=valid_subsampler,num_workers=4)      
         
         # loss
         #train_lossfn = CutMixCrossEntropyLoss(True)
         train_lossfn = nn.CrossEntropyLoss()
         valid_lossfn = nn.CrossEntropyLoss()
         # fine-tuning
         param_pre = [param for name,param in model.named_parameters()
                      if name not in ['fc.weight','fc.bias']]
         optimizer = torch.optim.AdamW([{'params':param_pre},{'params':model.fc.parameters(),'lr':learning_rate*5}],
                                       lr=learning_rate,weight_decay=weight_decay)
         scheduler = CosineAnnealingLR(optimizer,T_max=10) #10epoche完成一个周期的变化
            
         # model
         model.to(device)
         model = nn.DataParallel(model,devices)

         best_acc = 0
         model.train()
         
         for epoch in range(num_epochs):                            
              train_losses = []
              train_accs = []
              print(f'Starting epoch {epoch+1}')
              print('traing--')
              for imgs,labels in tqdm(train_iter):
                   imgs,labels = imgs.to(device),labels.to(device)
                   outputs = model(imgs)
                   l = train_lossfn(outputs,labels)
                   
                   optimizer.zero_grad()
                   l.backward()
                   optimizer.step()
                   train_losses.append(l.item())
                   train_accs.append((outputs.argmax(1)==labels).float().mean())

              train_loss = sum(train_losses)/len(train_losses)
              train_acc = sum(train_accs)/len(train_accs)
              print("(train)第%d个epoch的学习率：%f" % (epoch+1,optimizer.param_groups[0]['lr']))
              print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
              scheduler.step()
              
              # validion
              model.eval()
              valid_losses = []
              valid_accs = []
              print('validion--')
              with torch.no_grad():
                 for imgs,labels in tqdm(valid_iter):
                   imgs,labels = imgs.to(device),labels.to(device)
                   outputs = model(imgs)
                   l = valid_lossfn(outputs,labels)  
                   valid_losses.append(l.item())
                   valid_accs.append((outputs.argmax(1)==labels).float().mean())
              valid_loss = sum(valid_losses)/len(valid_losses)
              valid_acc = sum(valid_accs)/len(valid_accs)

              print(f"[ Valid | {epoch + 1:03d}/{num_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
              print('Accuracy for fold %d: %f' % (fold, valid_acc))
              print('--------------------------------------')
              results[fold] = valid_acc
              save_path = f'./model-fold-{fold}.pth'
              if valid_acc > best_acc:
                  best_acc = valid_acc
                  torch.save(model.state_dict(), save_path)  
                  print('saving model with acc {:.3f}'.format(best_acc))
               # Print fold results
    print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
    print('--------------------------------')
    total_summation = 0.0
    for key, value in results.items():
        print(f'Fold {key}: {value} ')
        total_summation += value
    print(f'Average: {total_summation/len(results.items())} ')

In [18]:
def pred(m_name):
    test_loader = DataLoader(LeavesData(test_path,img_path,'test',val_test_transform),batch_size=batch_size)
    model = model_sel(m_name)
    model.to(device)
    model = nn.DataParallel(model,devices)
    
    for test_fold in range(k_folds):
        model_path = f'./model-fold-{test_fold}.pth'
        saveFileName = f'./submission-fold-{test_fold}.csv'
        model.load_state_dict(torch.load(model_path))

        tta_model = tta.ClassificationTTAWrapper(model,tta.aliases.five_crop_transform(200,200))
        tta_model.eval()

        preds = []
        with torch.no_grad():
            for img in tqdm(test_loader):
                img = img.to(device)
                outputs = tta_model(img)
                preds.extend(outputs.argmax(1).cpu().numpy().tolist())
        preds = pd.Series(preds)
        preds = preds.apply(lambda x: num_to_class[x])
        test_df = pd.read_csv(test_path)
        test_df['label']=preds
        test_df.to_csv(saveFileName, index=False)
        print(f"{m_name} Model Results Done!!!!!!!!!!!!!!!!!!!!!!!!!!!")



In [19]:
#train('resnest50')
#pred('resnest50')

In [21]:
out_path = 'outputs/'
def submission(): 
    label_df = pd.DataFrame()
    for i in range(5):
        f_path = f'submission-fold-{i}.csv'
        label_df[f'{i}'] = pd.read_csv(out_path+f_path).label
        label_df[f'{i}'].apply(lambda x:class_to_num[x])
    labels = label_df.apply(lambda x:x.mode().iloc[0],axis=1)
    submission = pd.read_csv(test_path)
    submission['label'] = labels
    submission.to_csv('nest_5_sub.csv',index=False)
    print('Model Results Done!!!!!!!!!!!!!!!!!!!!!!!!!!!')
submission()




Model Results Done!!!!!!!!!!!!!!!!!!!!!!!!!!!
