In [1]:
import os
import pandas as pd
from tqdm import tqdm
from PIL import Image
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision import models as models
from torch.utils.data import DataLoader

In [2]:
train_df=pd.read_csv(r"./train.csv")
test_df=pd.read_csv(r"./test.csv")

preprocesses csv

In [3]:
label_list=sorted(set(train_df['label']))
num_classes=len(label_list)
label_dict=dict(zip(label_list, range(num_classes)))
num_dict={v: k for k, v in label_dict.items()}

{'abies_concolor': 0,
 'abies_nordmanniana': 1,
 'acer_campestre': 2,
 'acer_ginnala': 3,
 'acer_griseum': 4,
 'acer_negundo': 5,
 'acer_palmatum': 6,
 'acer_pensylvanicum': 7,
 'acer_platanoides': 8,
 'acer_pseudoplatanus': 9,
 'acer_rubrum': 10,
 'acer_saccharinum': 11,
 'acer_saccharum': 12,
 'aesculus_flava': 13,
 'aesculus_glabra': 14,
 'aesculus_hippocastamon': 15,
 'aesculus_pavi': 16,
 'ailanthus_altissima': 17,
 'albizia_julibrissin': 18,
 'amelanchier_arborea': 19,
 'amelanchier_canadensis': 20,
 'amelanchier_laevis': 21,
 'asimina_triloba': 22,
 'betula_alleghaniensis': 23,
 'betula_jacqemontii': 24,
 'betula_lenta': 25,
 'betula_nigra': 26,
 'betula_populifolia': 27,
 'broussonettia_papyrifera': 28,
 'carpinus_betulus': 29,
 'carpinus_caroliniana': 30,
 'carya_cordiformis': 31,
 'carya_glabra': 32,
 'carya_ovata': 33,
 'carya_tomentosa': 34,
 'castanea_dentata': 35,
 'catalpa_bignonioides': 36,
 'catalpa_speciosa': 37,
 'cedrus_atlantica': 38,
 'cedrus_deodara': 39,
 'cedru

dataset

In [4]:
# read data and resize picture
class LeavesDataset(Dataset):
    def __init__(self, img_dir, annotation_file, mode='train',ratio=0.9):
        self.img_dir=img_dir
        csv=pd.read_csv(annotation_file)
        length=int(len(csv)*ratio)
        self.mode=mode
        if self.mode == 'train':
            self.img_arr=csv.iloc[1:length,0]
            self.img_labels_arr=csv.iloc[1:length,1]
        elif self.mode == 'val':
            self.img_arr=csv.iloc[length:,0]
            self.img_labels_arr=csv.iloc[length:,1]
        elif self.mode == 'test':
            self.img_arr=csv.iloc[1:,0]
        

    def __getitem__(self, idx:int):
        img_path=os.path.join(self.img_dir + self.img_arr.iloc[idx])
        img=Image.open(img_path)
        if self.mode == 'train'or self.mode == 'val':
            label=self.img_labels_arr.iloc[idx]
            img_trans=transforms.Compose([
                        transforms.Resize((224,224)),
                        transforms.ToTensor()])
            img=img_trans(img)
            label_num=label_dict[label]
        elif self.mode == 'test':
            img_trans=transforms.Compose([
                        transforms.Resize((224,224)),
                        transforms.ToTensor()])            
            img=img_trans(img) 
            return img
        return img, label_num
        
    def __len__(self):
        return len(self.img_arr)

In [5]:
train_dataset=LeavesDataset(r'./',
                            r'./train.csv',
                            mode='train', ratio=0.9)
val_dataset=LeavesDataset(r'./',
                            r'./train.csv',
                            mode='val', ratio=0.9)
test_dataset=LeavesDataset(r'./',
                            r'./test.csv',
                            mode='test', ratio=0.9)

train_dataloader=DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=5)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=5)
test_dataloader = DataLoader(test_dataset, batch_size=128, num_workers=5)
#train_feature, train_label = next(iter(train_dataloader))
#test=next(iter(train_dataloader))
#print(test.size)

In [6]:
# iter a batch each time, which means call 128x __getitem__()
#print(f"Feature batch shape: {train_feature.size()}")
#print(type(train_feature))
#print('------------------------')
#test_feature = next(iter(train_dataloader))


Feature batch shape: torch.Size([128, 3, 224, 224])


128

In [7]:
def res_model(num_classes, feature_extract = False, use_pretrained=True):
    model_ft=models.resnet34(pretrained=use_pretrained)
    num_ftrs=model_ft.fc.in_features
    #?
    model_ft.fc=nn.Sequential(nn.Linear(num_ftrs, num_classes))
    return model_ft

In [8]:
lr=3e-4
weight_decay=1e-3
num_epochs=50
model_path=r'./pre_res_model.ckpt'
def get_device():
    return 'cuda' if torch.cuda.is_available() else 'cpu'
device=get_device()
print(device)
model=res_model(176)
model=model.to(device)
criterion=nn.CrossEntropyLoss()
optimizier=torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
best_acc=0.0

cuda


In [None]:
for epoch in range(num_epochs):
    model.train()
    train_loss=[]
    train_accs=[]
    for batch in tqdm(train_dataloader):
        imgs, labels = batch
        imgs = imgs.to(device)
        labels = labels.to(device)
        logits = model(imgs)
        loss=criterion(logits, labels)
        optimizier.zero_grad()
        loss.backward()
        optimizier.step()
        acc = (logits.argmax(dim=-1) == labels).float().mean()
        train_loss.append(loss.item())
        train_accs.append(acc)
    train_loss=sum(train_loss)/len(train_loss)
    train_acc=sum(train_accs)/len(train_accs)
    print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    
    valid_loss = []
    valid_accs = []
    for batch in tqdm(val_dataloader):
        imgs, labels = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
            labels=labels.to(device)
        loss = criterion(logits, labels)
        acc = (logits.argmax(dim=-1) == labels).float().mean()
        valid_loss.append(loss.item())
        valid_accs.append(acc)
    valid_loss=sum(valid_loss)/len(valid_loss)    
    valid_accs=sum(valid_accs)/len(valid_accs)
    print(f"[Valid | {epoch + 1:03d}/{num_epochs:03d}] loss = {valid_loss:.5f}, acc = {valid_loss:.5f}")
    if valid_accs>best_acc:
        best_acc=valid_accs
        torch.save(model.state_dict(), model_path)
        print("saving model with acc {:.3f}".format(best_acc))

100%|█████████████████████████████████████████████████████| 130/130 [02:51<00:00,  1.32s/it]


[ Train | 001/050 ] loss = 1.93956, acc = 0.58570


100%|███████████████████████████████████████████████████████| 15/15 [00:08<00:00,  1.81it/s]


[Valid | 001/050] loss = 0.83605, acc = 0.83605
saving model with acc 0.796


100%|█████████████████████████████████████████████████████| 130/130 [04:35<00:00,  2.12s/it]


[ Train | 002/050 ] loss = 0.52670, acc = 0.87248


100%|███████████████████████████████████████████████████████| 15/15 [00:11<00:00,  1.25it/s]


[Valid | 002/050] loss = 0.53467, acc = 0.53467
saving model with acc 0.856


100%|█████████████████████████████████████████████████████| 130/130 [06:44<00:00,  3.11s/it]


[ Train | 003/050 ] loss = 0.31967, acc = 0.91669


100%|███████████████████████████████████████████████████████| 15/15 [00:18<00:00,  1.23s/it]


[Valid | 003/050] loss = 0.38333, acc = 0.38333
saving model with acc 0.889


100%|█████████████████████████████████████████████████████| 130/130 [07:03<00:00,  3.25s/it]


[ Train | 004/050 ] loss = 0.26131, acc = 0.93369


100%|███████████████████████████████████████████████████████| 15/15 [00:19<00:00,  1.32s/it]


[Valid | 004/050] loss = 0.38953, acc = 0.38953
saving model with acc 0.894


100%|█████████████████████████████████████████████████████| 130/130 [11:40<00:00,  5.38s/it]


[ Train | 005/050 ] loss = 0.25963, acc = 0.93117


100%|███████████████████████████████████████████████████████| 15/15 [00:24<00:00,  1.62s/it]


[Valid | 005/050] loss = 0.31790, acc = 0.31790
saving model with acc 0.909


100%|█████████████████████████████████████████████████████| 130/130 [13:12<00:00,  6.09s/it]


[ Train | 006/050 ] loss = 0.23635, acc = 0.93938


100%|███████████████████████████████████████████████████████| 15/15 [00:24<00:00,  1.62s/it]


[Valid | 006/050] loss = 0.30619, acc = 0.30619


 30%|████████████████▏                                     | 39/130 [03:59<09:14,  6.09s/it]

In [None]:
saveFileName = './submission.csv'

model = res_model(176)
model = model.to(device)
model.load_state_dict(torch.load(model_path))
model.eval()
predictions = []
for batch in tqdm(test_dataloader):
    imgs = batch
    with torch.no_grad():
        logits=model(imgs.to(device))
    predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())
pred=[]
for i in predictions:
    pred.append(num_dict[i])
test_path="./test.csv"
test_data = pd.read_csv(test_path)
test_data['label'] = pd.Series(pred)
submission=pd.concat([test_data['image'], test_data['label']], axis=1)
submission.to_csv(saveFileName, index=False)
print("Done")
