# Plant Species Identification

## Imports

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.conv_learner import *

## Data Setup

### Create data object

In [3]:
CWD = os.getcwd()
PATH = 'data/plant_seedlings_classification'
label_csv = os.path.join(PATH, 'labels.csv')

In [4]:
sz = 224
arch = resnet50
bs = 64

In [None]:
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)

### Data Object

In [5]:
def get_data(sz, bs, arch):
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_top_down, max_zoom=1.1)
    data = ImageClassifierData.from_csv(PATH, 'train', label_csv, test_name='test',
                                        val_idxs=val_idxs, tfms=tfms, bs=bs)
    return data.resize(int(sz*1.3)) if sz > 300 else data.resize(340, 'tmp')

In [6]:
data = get_data(sz, bs, arch)




## Image Classifier

### Create Learner

In [19]:
def initial_training(lr, learn):
    learn.fit(lr, n_cycle=4, cycle_len=1, cycle_mult=2)
#     learn.precompute = False
#     learn.fit(lr, n_cycle=3, cycle_len=1, cycle_mult=2)
    return learn

In [49]:
def train_new_size(size, learn, arch):
    print(size)
    lr = size['lr']; sz = size['sz']; bs = size['bs']; save_name = size['save_name'];
    learn.set_data(get_data(sz,bs,arch)) #set data will freeze learner
    learn.precompute = False
    learn.unfreeze()
    print(learn.precompute)
    learn.fit([lr/9, lr/3, lr], n_cycle=3, cycle_len=1, cycle_mult=2)
    learn.save(save_name)
    return learn

In [63]:
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [22]:
learn = initial_training(3e-2, learn)

[ 0.       1.17421  0.72959  0.77751]                     
[ 1.       0.81134  0.58232  0.81142]                      
[ 2.       0.58194  0.47529  0.837  ]                      
[ 3.       0.55831  0.56589  0.8353 ]                      
[ 4.       0.46525  0.42028  0.86011]                      
[ 5.       0.36066  0.39332  0.86115]                      
[ 6.       0.2863   0.47452  0.85822]                      
[ 7.       0.3136   0.58323  0.82365]                      
[ 8.       0.33292  0.49794  0.85015]                      
[ 9.       0.31051  0.44314  0.85054]                      
[ 10.        0.25074   0.39902   0.86362]                  
[ 11.        0.19769   0.49155   0.8603 ]                  
[ 12.        0.18041   0.4465    0.86551]                  
[ 13.        0.17061   0.50019   0.86154]                  
[ 14.        0.16427   0.35269   0.88113]                  



In [64]:
# learn.save('initial_training')
learn.load('initial_training')

In [65]:
learn.set_data(get_data(sz,bs,arch))
learn.precompute = False
learn.unfreeze()




In [66]:
lr = 3e-2

In [None]:
learn.fit([lr/9, lr/3, lr], n_cycle=3, cycle_len=1, cycle_mult=2)

[ 0.       0.70094  0.41884  0.86115]                      
[ 1.       0.43418  0.36696  0.88908]                      
[ 2.       0.26808  0.1815   0.94128]                      
[ 3.       0.24319  0.20534  0.93152]                      
 27%|██▋       | 16/60 [00:21<00:58,  1.33s/it, loss=0.229]

In [27]:
learn.fit([lr/9, lr/3, lr], n_cycle=3, cycle_len=1, cycle_mult=2)

[ 0.       0.66828  0.34069  0.88009]                      
[ 1.       0.39919  0.22215  0.91381]                      
[ 2.       0.24967  0.17397  0.93171]                      
[ 3.       0.23531  0.29597  0.90737]                      
[ 4.       0.18539  0.22285  0.91694]                      
[ 5.       0.13884  0.15373  0.94063]                      
[ 6.       0.09984  0.15368  0.94356]                       



In [28]:
learn.save('resnet50')

In [52]:
sizes = {
            '224':{'lr':3e-2, 'sz':224, 'bs':64, 'save_name':f'resnet50_224'},
            '299':{'lr':1e-2, 'sz':299, 'bs':64, 'save_name':f'resnet50_299'}
        }
order = ['224', '299']

In [53]:
for size in order:
    learn = train_new_size(sizes[size], learn, arch)

{'lr': 0.03, 'sz': 224, 'bs': 64, 'save_name': 'resnet50_224'}



False


[ 0.       0.50572  0.38828  0.87052]                      
  8%|▊         | 5/60 [00:02<00:28,  1.96it/s, loss=0.518]

KeyboardInterrupt: 

## Test predictions

In [None]:
def test_predictions(learn):
    log_preds,y = learn.TTA(is_test=True, n_aug=8)
    probs = np.mean(np.exp(log_preds), axis=0)
    preds = np.argmax(probs, axis=1)
    return preds

In [None]:
preds = test_predictions(learn)

## Submit to kaggle

In [None]:
def kaggle_submit(PATH, preds, data, sub_num):
    class_preds = [data.classes[x].replace('_', ' ') for x in preds]

    assert(len(class_preds) == len(data.test_ds.fnames))

    files = [x.split('/')[-1] for x in data.test_ds.fnames]

    df = pd.DataFrame(list(zip(files, class_preds)))
    df.columns = ['file', 'species']

    submit_folder = os.path.join(PATH, 'submit')
    submit_filepath = os.path.join(submit_folder, f'plant_submission{sub_num}.gz')
    os.makedirs(submit_folder, exist_ok=True)
    df.to_csv(submit_filepath, compression='gzip', index=False)
    return submit_filepath

In [None]:
FileLink(kaggle_submit(PATH, preds, data, '4'))