Best score: 98.654%


In [None]:
from fastai.vision.all import *
from fastai.callback.wandb import *
import torchvision.models as models
import albumentations as Alb 
# import wandb
import pandas as pd

In [None]:
HP = {
    'epochs': 22,
    'batch_size': 32,
    'test_size': 0.05,
    'seed': 42
}

# wandb.init(
#     entity='konradszafer',
#     project='paddy-disease-classification',
#     name='fastai run 11',
#     notes='''
#     resnet50, fine tuning
#     ''',
#     config=HP
# )

set_seed(HP['seed'])

In [None]:
dataset_dir = '/kaggle/input/paddy-disease-classification/train_images'
dataset_file = '/kaggle/input/paddy-disease-classification/train.csv'
submission_dir = '/kaggle/input/paddy-disease-classification/test_images/'
submission_output = '/kaggle/working/submission.csv'
labels = [name for name in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, name))]

In [None]:
class AlbumentationsTransform(Transform):
    def __init__(self, aug): self.aug = aug
    def encodes(self, img: PILImage):
        aug_img = self.aug(image=np.array(img))['image']
        return PILImage.create(aug_img)

def get_train_aug():
    return Alb.Compose([
        Alb.ShiftScaleRotate(rotate_limit=15, border_mode=0, value=(0,0,0)),
        Alb.Transpose(),
        Alb.Flip(),
        Alb.RandomRotate90(),
        Alb.RandomBrightnessContrast(),
        Alb.HueSaturationValue(
            hue_shift_limit=0.5,
            sat_shift_limit=0.5,
            val_shift_limit=0.5,
            p=0.5),
    ])

item_tfms = [
    Resize(224, method='squish'), 
    AlbumentationsTransform(get_train_aug())
] 
batch_tfms = Normalize.from_stats(*imagenet_stats)

In [None]:
dls = ImageDataLoaders.from_folder(
    dataset_dir,
    train='.',
    valid_pct=HP['test_size'], 
    item_tfms=item_tfms,
    batch_tfms=batch_tfms,
    bs=HP['batch_size'],
    shuffle=True
)
dls.train.show_batch(max_n=16)

In [None]:
print(f'train len: {len(dls.train.items)} valid len: {len(dls.valid.items)}')

In [None]:
learn = vision_learner(
    dls,
    resnet50,
    path='.', 
    loss_func=FocalLoss(),
    metrics=[accuracy, error_rate],
#     cbs=WandbCallback()
).to_fp16()

In [None]:
%%time
learn.fine_tune(
    HP['epochs'], 
    freeze_epochs=1,
    cbs=[
        ShowGraphCallback(),
        SaveModelCallback(monitor='error_rate')
    ]
)
learn.save('model')

In [None]:
learn.lr_find()

In [None]:
learn.fine_tune(
    3, 
    freeze_epochs=0,
    base_lr=2e-5,
    cbs=[
        ShowGraphCallback(), 
        SaveModelCallback(monitor='error_rate')
    ]
)

In [None]:
learn.lr_find()

In [None]:
learn.fine_tune(
    4,
    freeze_epochs=0,
    base_lr=1.7e-5,
    cbs=[
        ShowGraphCallback(), 
        SaveModelCallback(monitor='error_rate')
    ]
)

In [None]:
# finetuning on nearly full dataset
# dls = ImageDataLoaders.from_folder(
#     dataset_dir,
#     train='.',
#     valid_pct=0.01,
#     item_tfms=item_tfms,
#     batch_tfms=batch_tfms,
#     bs=HP['batch_size'],
#     shuffle=True
# )
# learn.dls = dls
# learn.fine_tune(5, freeze_epochs=1, cbs=[ShowGraphCallback()])

In [None]:
learn.load('model')

In [None]:
test_files = get_image_files(submission_dir)
dfs_test = dls.test_dl(test_files, shuffle=False, with_labels=False)
dfs_test.show_batch(max_n=8)

In [None]:
predictions = learn.tta(dl=dfs_test, n=HP['batch_size'], use_max=False)

In [None]:
df = pd.DataFrame()
df['image_id'] = [item.name for item in dfs_test.items]
df['label'] = learn.dls.vocab[np.argmax(predictions[0], axis=1)]
df.to_csv(submission_output, index=False, header=True)
print('done')

In [None]:
df['label'].value_counts()