In [None]:
# default_exp models.binaryClassification
# default_cls_lvl 2

# Binary Horse Poo Model

> Simple model to detect HorsePoo vs noHorsePoo

## export data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#!rm -R data/tmp/horse_poo/ && rm -R data/tmp/no_horse_poo/ 

In [None]:
#!prodigy db-out binary_horse_poo ./data/tmp

## Description
With this model we will start of with a very simple binary classification. We will try to use most of the default settings from fastai. This will also be our benchmark model for further investigations. 

In [None]:
#export
from fastai.vision import * 
from fastai.callbacks import EarlyStoppingCallback
from prodigy.util import read_jsonl, write_jsonl
from prodigy.components.db import connect
from PooDetector.dataset_operations import extract_jsonl_to_binary_folders
import os
import shutil
from fastscript import *

In [None]:
#export
def prepare_data(fld_input:str='data/tmp', bs=256):
    """function to get a fastai databunch which can be used for training"""
    #tfms = get_transforms(do_flip=False, max_zoom=1, max_warp=None)
    #t_tfms = []
    #t_tfms.append(flip_lr(p=0.5))
    #t_tfms.append(symmetric_warp(magnitude=(-0.2,0.2), p=0.75))
    #t_tfms.append(rotate(degrees=(-10,10), p=0.75))
    #t_tfms.append(rand_zoom(scale=(1.,1.1), p=0.75))
    #t_tfms.append(brightness(change=(0.5*(1-0.2), 0.5*(1+0.2)), p=0.75))
    #t_tfms.append(contrast(scale=(1-0.2, 1/(1-0.2)), p=0.75))
    #tfms =  (t_tfms , [])
    tfms = get_transforms()
    return (ImageList.from_folder(fld_input)
            .split_by_rand_pct(0.2)         
            .label_from_folder()
            .transform(tfms, size=224)
            .databunch(bs=bs)
            .normalize(imagenet_stats))


In [None]:
#no_testing
data = prepare_data(fld_input='test_data/', bs=16)

In [None]:
#no_testing
data.show_batch()

In [None]:
#export 
def get_learner(data:ImageDataBunch=None, model:Module=None):
    """get a lerner object for training"""
    if data is None:
        data = prepare_data()
    if model is None:
        model = models.resnet50
        
    early_stopping = partial(EarlyStoppingCallback,  min_delta=0.005, patience=8)
    
    return cnn_learner(data, base_arch=model, callback_fns=[early_stopping])

In [None]:
#no_testing
learn = get_learner(data=data)

In [None]:
#no_testing
learn.fit_one_cycle(2, 5e-2)
#learn.fit_one_cycle(2, 5e-2)
learn.save('stage1')

In [None]:
#no_testing
learn.export()

In [None]:
#export 
@call_parse
def train_model(path_jsonl:Param("path to jsonl file", str)='test_data/binary_horse_poo.jsonl',
                cycles_to_fit:Param("number of cycles to fit", int)=10, 
                bs:Param("batch size", int)=128,
                label:Param("positive label for binary classification", str)="horse_poo"
               ):
    """start training a new model with early stopping and export it"""
    path_jsonl = Path(path_jsonl)
    if path_jsonl.exists():
        path_jsonl.unlink()
        
    db = connect()   # uses settings from your prodigy.json
    images = db.get_dataset("binary_horse_poo")
    write_jsonl(path_jsonl, images)
    
    remove_subfolders(str(path_jsonl.parent))
    
    extract_jsonl_to_binary_folders(str(path_jsonl), label)
    
    data = prepare_data(path_jsonl.parent, bs=bs)
    
    learn = get_learner(data)
    learn.fit_one_cycle(cycles_to_fit, 5e-2)
    learn.export()
    return learn
    
    
    
 


In [None]:
#export 
def remove_subfolders(path_parent:[Path, str]):
    """reomve all subfolders"""
    path_parent = Path(path_parent)
    for root, dirs, files in os.walk(str(path_parent), topdown=False):
        for directory in dirs:
            print(f"remove {str(Path(root) / Path(directory))}")
            shutil.rmtree(str(Path(root) / Path(directory)))
    

In [None]:
#no_testing
path = Path('test_data/tmp/')

if os.path.exists(str(path)) is False:
    os.mkdir(str(path))
    
if os.path.exists(str(path / 'horse')) is False:
    os.mkdir(str(path / 'horse'))
    
if os.path.exists(str(path / 'no_horse')) is False:
    os.mkdir(str(path / 'no_horse'))
    
assert os.path.exists(str(path))
assert os.path.exists(str(path / 'horse'))
assert os.path.exists(str(path / 'no_horse'))

remove_subfolders(str(path))

assert not os.path.exists(str(path / 'horse'))
assert not os.path.exists(str(path / 'no_horse'))



In [None]:
# prepare test
path_jsonl = 'test_data/binary_horse_poo.jsonl' 
path_jsonl = Path(path_jsonl)

if os.path.exists('test_data/tmp') is False:
    os.mkdir('test_data/tmp')

path_fld_target = path_jsonl.parent / 'tmp'

shutil.copy(str(path_jsonl), str(path_fld_target) )
path_jsonl = path_fld_target / path_jsonl.name
assert os.path.exists(path_jsonl)

#test
#learn = train_model(path_jsonl=path_jsonl, cycles_to_fit=2, bs=4)
                    
                    
#assert os.path.exists(str(path_jsonl.parent / 'export.pkl'))


In [None]:
#no_testing
#!prodigy db-out binary_horse_poo > data/tmp/binary_horse_poo.jsonl
path_jsonl = 'data/tmp/binary_horse_poo.jsonl'
learn = train_model(path_jsonl=path_jsonl, cycles_to_fit=15, bs=128)

In [None]:
#no_testing
learn.unfreeze()

In [None]:
#no_testing
learn.fit_one_cycle(8)

In [None]:
#no_testing
learn.fit_one_cycle(8)

In [None]:
#no_testing
learn.export() 