# Prodigy Recipe


In [None]:
# default_exp custom_recipe
# default_cls_lvl 2

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import numpy as np
import copy
import io
from PIL import Image
import PIL
from time import time
import json
from pathlib import Path

from prodigy.components.loaders import get_stream
from prodigy.components.preprocess import fetch_images
from prodigy.core import recipe, recipe_args
from prodigy.util import log, b64_uri_to_bytes, split_string
from prodigy.components.loaders import Images
from prodigy.components.sorters import prefer_uncertain, prefer_high_scores, prefer_low_scores
import prodigy


from fastai.vision import *
import fastai
from pathlib import Path
import torch

In [None]:
#export
@recipe(
    "fastaimodel",
    dataset=("The dataset to use", "positional", None, str),
    source=("Path to a directory of images", "option", "source", str),
    model_path=("Path to the fastai model", "option", "model", str),
    target_folder=("Path to the target folder where the pictures are " +
                   "in the labled folders", 
                   "option", 
                   "target_folder", 
                   str),
    sort_by_score_type=("choose which order you want to receive the predictions. " +
                        "The availiable orders are prefer_uncertain, prefer_high_scores, prefer_low_scores.", 
                        "option", 
                        "sort", 
                        str),
    label=("One or more comma-separated labels", "option", "label", str)    
)
def fastai_recipe(dataset, source, model_path, target_folder, sort_by_score_type, label='horse_poo'):
    """recipe to load data in a certain order and save them to a folder"""

    
    def update(examples):
        # This function is triggered when Prodigy receives annotations
        print(f"type of examples = {type(examples)}")
        for example in examples:
            if example['answer'] == 'accept':
                save_base64_image(str(target_folder_pos), example['text'] + '.jpg', example['image'])
            if example['answer'] == 'reject':
                save_base64_image(str(target_folder_neg), example['text'] + '.jpg', example['image'])
                    
            
        #print(f"Received {len(examples)} annotations!")
      
    
    #create folders
    create_folders(target_folder, label)
    target_folder = Path(target_folder)
    target_folder_pos = target_folder / label
    target_folder_neg = target_folder / ('no_' + label)
            
    learn = load_fastai_model(model_path)
    stream = score_stream(Images(source), model_path)
    
  
    if sort_by_score_type == 'prefer_high_scores':
        stream = prefer_high_scores(stream)
    elif sort_by_score_type == 'prefer_low_scores':
        stream = prefer_low_scores(stream)
    elif sort_by_score_type == 'prefer_uncertain':
        stream = prefer_uncertain(stream)


    return {
        "dataset": dataset,
        "view_id": "image_manual",
        "stream": stream,
        "update": update,
        "config": {  # Additional config settings, mostly for app UI
            "label": "horse_poo"
        }
        
    }


In [None]:
#export 
def create_folders(path:str, label:str) -> None:
    """create the target folder"""
    path = Path(path)
    path.mkdir(parents=True, exist_ok=True)
    path_pos = path / label
    path_pos.mkdir(parents=True, exist_ok=True)
    path_neg = path / ('no_' + label)
    path_neg.mkdir(parents=True, exist_ok=True)

In [None]:
# check if folder do not exist
Path('horse_poo').rmdir()
Path('no_horse_poo').rmdir()
create_folders('.', 'horse_poo')
assert Path('horse_poo').exists() is True
assert Path('no_horse_poo').exists() is True

In [None]:
#export
def load_fastai_model(path):
    """load a fastai model from a given path"""
    path = Path(path)
    folder = path.parent
    file = path.name
    return load_learner(str(folder), str(file))
    

In [None]:
model = load_fastai_model('data/binary_horse_poo/binary_horse_poo_model.pkl')
prediction = model.predict(fastai.vision.open_image('data/binary_horse_poo/no_poo/20181215135116-poo_day.jpg'))
print(prediction[2][0].numpy())
assert type(model) == fastai.basic_train.Learner

0.9797697


In [None]:
#export 
def save_base64_image(path, filename, uri):
    """save base64 encoded image """
    tgt_path = Path(path) / filename
    pil_image = PIL.Image.open(io.BytesIO(b64_uri_to_bytes(uri)))
    pil_image.save(str(tgt_path))

In [None]:
from prodigy.components.loaders import JSONL
jsonl_stream = JSONL("data/binary_horse_poo.jsonl")
line = next(jsonl_stream)
#save_base64_image('.', 'test.jpg', line['image'])

In [None]:
!rm test.jpg

rm: cannot remove 'test.jpg': No such file or directory


In [None]:
#export
def score_stream(stream, model_path):
    learn = load_fastai_model(model_path)
    for example in stream:
        if not example["image"].startswith("data"):
            msg = "Expected base64-encoded data URI, but got: '{}'."
            raise ValueError(msg.format(example["image"][:100]))

        pil_image = PIL.Image.open(io.BytesIO(b64_uri_to_bytes(example["image"])))
        a = np.asarray(pil_image)
        a = np.transpose(a, (1, 0, 2))
        a = np.transpose(a, (2, 1, 0))
        x = torch.from_numpy(a.astype(np.float32, copy=False) )
        x = x.div_(255)
        score = learn.predict(Image(x))[2][0].numpy().item() 
        print(f"socre={score}, id={example['text']}")
        yield (score, example)

In [None]:
from prodigy.components.loaders import JSONL
jsonl_stream = JSONL("data/binary_horse_poo.jsonl")
stream = score_stream(jsonl_stream, 'data/binary_horse_poo/binary_horse_poo_model.pkl')
score, image = next(stream)
score




  if __name__ == '__main__':


0.6369343996047974

In [None]:
from nbdev.export import *
notebook2script('01_custom_receipe.ipynb')

Converted 01_custom_receipe.ipynb.


In [None]:
!prodigy fastaimodel test3 \
-model /home/wilhelm/PooDetector/data/binary_horse_poo/binary_horse_poo_model.pkl \
-source /mnt/Data/to_label/20200229 \
-target_folder /home/wilhelm \
-sort prefer_high_scores \
-F /home/wilhelm/PooDetector/PooDetector/custom_recipe.py


✨  Starting the web server at http://0.0.0.0:9010 ...
Open the app in your browser and start annotating!

socre=0.9801797866821289, id=20200229103118_768790
socre=0.9622217416763306, id=20200229162900_426466
socre=0.9633802771568298, id=20200229084533_201695
socre=0.9808942675590515, id=20200229153105_969238
socre=0.9720112681388855, id=20200229103446_476237
socre=0.9864588379859924, id=20200229093442_271649
socre=0.9516823887825012, id=20200229093121_885559
socre=0.9761852622032166, id=20200229152606_403341
socre=0.8757503032684326, id=20200229132722_488232
socre=0.9560772180557251, id=20200229161906_463297
socre=0.9678339958190918, id=20200229163432_114649
socre=0.9382492899894714, id=20200229135056_828101
socre=0.9368922114372253, id=20200229153858_261027
socre=0.9332748055458069, id=20200229154200_634088
socre=0.9736192226409912, id=20200229085533_518430
socre=0.9519771933555603, id=20200229085054_083134
socre=0.9715439677238464, id=20200229150310_186305
socre=0.9753612279891968, 

socre=0.9679936766624451, id=20200229121701_079317
socre=0.9779331684112549, id=20200229115056_411737
socre=0.960695743560791, id=20200229112050_941207
socre=0.9705675840377808, id=20200229135711_216808
socre=0.9676828980445862, id=20200229150549_961377
socre=0.9595694541931152, id=20200229100445_679872
socre=0.9552376866340637, id=20200229122058_900269
socre=0.8382460474967957, id=20200229152711_218550
socre=0.9224334359169006, id=20200229123454_404256
socre=0.9712335467338562, id=20200229104826_329804
socre=0.9721836447715759, id=20200229135710_674187
socre=0.97917240858078, id=20200229115048_574814
type of examples = <class 'list'>
type of examples = <class 'list'>
type of examples = <class 'list'>
socre=0.9735410809516907, id=20200229120934_630834
socre=0.986242413520813, id=20200229120623_350528
socre=0.9689278602600098, id=20200229170034_785291
socre=0.93310546875, id=20200229121959_093073
socre=0.8791077733039856, id=20200229125204_287574
socre=0.9599208235740662, id=20200229163

socre=0.971504271030426, id=20200229092248_369039
socre=0.9407975077629089, id=20200229131439_230397
socre=0.9621395468711853, id=20200229164331_725991
socre=0.9681323170661926, id=20200229153315_977640
socre=0.9793322086334229, id=20200229101330_691011
socre=0.9619938135147095, id=20200229151441_393881
socre=0.9511755108833313, id=20200229101801_241720
socre=0.9726722836494446, id=20200229103631_373305
socre=0.9792314767837524, id=20200229115327_346552
socre=0.9001967906951904, id=20200229132334_735951
socre=0.9567673802375793, id=20200229090553_094346
socre=0.9861694574356079, id=20200229125136_675051
socre=0.98776775598526, id=20200229105325_050920
socre=0.9799866080284119, id=20200229115722_992343
socre=0.9362930655479431, id=20200229094416_667911
socre=0.9733160138130188, id=20200229164916_643076
socre=0.8475401401519775, id=20200229112826_277047
socre=0.983633816242218, id=20200229110822_484356
socre=0.9817492961883545, id=20200229134520_344910
socre=0.9565860033035278, id=202002

socre=0.9624961018562317, id=20200229090637_815192
socre=0.968696653842926, id=20200229141433_973324
socre=0.9573642015457153, id=20200229154414_402352
socre=0.9654117226600647, id=20200229100555_090029
socre=0.9146071672439575, id=20200229130830_342554
socre=0.9684139490127563, id=20200229144411_718986
socre=0.9649611711502075, id=20200229132251_563745
socre=0.9570723176002502, id=20200229151426_639753
socre=0.9690658450126648, id=20200229163650_779034
socre=0.9865790009498596, id=20200229153533_875866
socre=0.9572031497955322, id=20200229135849_601995
socre=0.9666953086853027, id=20200229100812_571492
socre=0.8725063800811768, id=20200229111927_187853
socre=0.9188053011894226, id=20200229125740_712227
socre=0.9728517532348633, id=20200229091729_849052
socre=0.9877995252609253, id=20200229121308_076461
socre=0.9677887558937073, id=20200229151815_098839
socre=0.976691722869873, id=20200229142327_711099
socre=0.9714949131011963, id=20200229084432_191168
socre=0.9664345979690552, id=2020

socre=0.9357277154922485, id=20200229145454_365838
socre=0.9456393718719482, id=20200229151234_942490
socre=0.9587103128433228, id=20200229132247_385427
socre=0.9468573331832886, id=20200229134729_926744
socre=0.9476934671401978, id=20200229141535_073557
socre=0.9704319834709167, id=20200229103458_880003
socre=0.9501766562461853, id=20200229164356_804349
socre=0.9597241282463074, id=20200229150153_993253
socre=0.9510564804077148, id=20200229152502_349067
socre=0.9026398658752441, id=20200229093830_534944
socre=0.9879905581474304, id=20200229111412_665745
socre=0.8179656267166138, id=20200229131936_575824
socre=0.9790058732032776, id=20200229111437_019320
socre=0.9944974780082703, id=20200229152946_000987
socre=0.974880039691925, id=20200229100522_140700
socre=0.970553994178772, id=20200229145649_955609
socre=0.9462359547615051, id=20200229161311_661475
socre=0.8311284780502319, id=20200229130632_067944
socre=0.9403167366981506, id=20200229123443_415243
socre=0.9716070890426636, id=2020

socre=0.9516381025314331, id=20200229140306_891553
socre=0.937810480594635, id=20200229094352_996183
socre=0.992188572883606, id=20200229101709_618879
socre=0.9408798217773438, id=20200229123329_220217
socre=0.9446083903312683, id=20200229154928_960873
socre=0.9587826728820801, id=20200229145342_131249
socre=0.9732767343521118, id=20200229143821_651005
socre=0.9665815830230713, id=20200229093854_323150
socre=0.9776086807250977, id=20200229104534_305064
socre=0.9619717001914978, id=20200229090120_371795
socre=0.9556128978729248, id=20200229155543_866191
socre=0.9729673266410828, id=20200229095439_051465
socre=0.9383370876312256, id=20200229092835_457638
socre=0.9649348258972168, id=20200229152018_146209
socre=0.9645670056343079, id=20200229151048_751927
socre=0.9734627604484558, id=20200229100754_654163
socre=0.9735638499259949, id=20200229103841_308218
socre=0.931776225566864, id=20200229094430_307083
socre=0.9714905619621277, id=20200229095400_283404
socre=0.8974096179008484, id=20200

socre=0.9635976552963257, id=20200229090829_380284
socre=0.9736664891242981, id=20200229090914_048157
type of examples = <class 'list'>
type of examples = <class 'list'>
socre=0.9791284799575806, id=20200229143122_346558
socre=0.9375361800193787, id=20200229142100_792541
socre=0.9599020481109619, id=20200229153241_262488
socre=0.9331477284431458, id=20200229122112_408746
socre=0.967954695224762, id=20200229114139_552693
socre=0.9485962390899658, id=20200229150231_610053
socre=0.9587916135787964, id=20200229084943_966141
socre=0.9724544286727905, id=20200229131138_008500
socre=0.947309136390686, id=20200229142643_714822
socre=0.9855617880821228, id=20200229103011_404765
socre=0.9673556089401245, id=20200229084349_340828
socre=0.7517139911651611, id=20200229131906_117282
socre=0.9712821841239929, id=20200229091415_070947
socre=0.9247856140136719, id=20200229132056_633724
socre=0.8983824849128723, id=20200229112805_132816
socre=0.9268825650215149, id=20200229134731_718511
socre=0.98262792

socre=0.9791530966758728, id=20200229153830_118021
socre=0.9534602165222168, id=20200229140658_836619
socre=0.850648045539856, id=20200229131814_777145
socre=0.964337170124054, id=20200229160848_747184
socre=0.9030807018280029, id=20200229121521_570977
socre=0.970093846321106, id=20200229162049_950375
socre=0.9186730980873108, id=20200229152550_579175
socre=0.9708506464958191, id=20200229141024_099733
type of examples = <class 'list'>
type of examples = <class 'list'>
socre=0.9855042099952698, id=20200229115102_412710
socre=0.9785924553871155, id=20200229093322_932840
socre=0.9705591797828674, id=20200229101012_302480
socre=0.9664965271949768, id=20200229122108_942043
socre=0.9552467465400696, id=20200229143009_182270
socre=0.9759506583213806, id=20200229153340_271311
socre=0.9691079258918762, id=20200229141047_897124
socre=0.965154230594635, id=20200229164744_198069
socre=0.9636027216911316, id=20200229111132_802846
socre=0.9747629165649414, id=20200229114634_541874
socre=0.9368208646

socre=0.9480276107788086, id=20200229125313_982983
socre=0.9873737692832947, id=20200229104733_069674
socre=0.9732812643051147, id=20200229160440_572395
socre=0.9740748405456543, id=20200229103658_913432
socre=0.9785309433937073, id=20200229150256_044564
socre=0.9559406042098999, id=20200229153828_972721
socre=0.9733176231384277, id=20200229111842_596414
socre=0.9372414350509644, id=20200229123629_996926
socre=0.9522245526313782, id=20200229131051_017065
socre=0.9638245105743408, id=20200229152903_045678
socre=0.9539071321487427, id=20200229164736_293425
socre=0.9669057130813599, id=20200229152155_966567
socre=0.9785969257354736, id=20200229142534_969774
socre=0.965280294418335, id=20200229111628_451033
socre=0.982677161693573, id=20200229122347_547502
type of examples = <class 'list'>
socre=0.9804486632347107, id=20200229104051_899663
socre=0.9124205112457275, id=20200229113007_725441
socre=0.9619249701499939, id=20200229084141_419892
socre=0.9561883807182312, id=20200229123701_160772

socre=0.9395653009414673, id=20200229111020_686441
socre=0.9548386335372925, id=20200229134834_642676
socre=0.9790883660316467, id=20200229105733_534391
socre=0.9442565441131592, id=20200229154436_532517
socre=0.9795594215393066, id=20200229101342_785034
socre=0.9488092660903931, id=20200229124327_265961
socre=0.96260666847229, id=20200229094653_807051
socre=0.9565230011940002, id=20200229143627_261391
socre=0.9732357263565063, id=20200229124404_789148
socre=0.9711076021194458, id=20200229113658_317622
socre=0.9826036095619202, id=20200229093422_426004
socre=0.9453548192977905, id=20200229155340_931293
socre=0.9129841327667236, id=20200229113004_584139
socre=0.962236762046814, id=20200229101005_184697
socre=0.97003573179245, id=20200229111315_042599
socre=0.9723712801933289, id=20200229165154_016458
socre=0.9578550457954407, id=20200229115805_591861
socre=0.9550241231918335, id=20200229143554_186576
socre=0.9663733839988708, id=20200229162225_769286
socre=0.9739355444908142, id=2020022

socre=0.9778159856796265, id=20200229114808_578131
socre=0.9825823307037354, id=20200229113342_598142
socre=0.995951771736145, id=20200229152925_605576
socre=0.9684727787971497, id=20200229113629_751684
socre=0.9719085693359375, id=20200229151620_965355
socre=0.9759047627449036, id=20200229162246_755349
socre=0.9432732462882996, id=20200229124555_718473
socre=0.955945611000061, id=20200229161459_199929
socre=0.9738973379135132, id=20200229131140_409755
socre=0.9677600264549255, id=20200229152514_457704
socre=0.9911790490150452, id=20200229144932_001085
type of examples = <class 'list'>
type of examples = <class 'list'>
socre=0.9629544019699097, id=20200229143915_824607
socre=0.9760288596153259, id=20200229120106_041761
socre=0.9530149102210999, id=20200229122016_177451
socre=0.9657423496246338, id=20200229142758_717839
socre=0.9606462717056274, id=20200229123630_393677
socre=0.9771639108657837, id=20200229142754_406240
socre=0.96000736951828, id=20200229100923_181478
socre=0.9521870613

socre=0.9768548011779785, id=20200229114414_530392
socre=0.9679295420646667, id=20200229100827_455641
socre=0.9493977427482605, id=20200229165816_782890
socre=0.971076250076294, id=20200229151143_000406
socre=0.9658194184303284, id=20200229085135_417406
socre=0.9404493570327759, id=20200229131016_406654
socre=0.9603928327560425, id=20200229130402_575050
socre=0.957023561000824, id=20200229124214_053787
socre=0.9598789811134338, id=20200229092615_376073
socre=0.9766850471496582, id=20200229124457_650801
socre=0.9156416654586792, id=20200229112738_351286
socre=0.9501320123672485, id=20200229154808_921997
socre=0.9724516272544861, id=20200229112416_937029
socre=0.9771504402160645, id=20200229140225_994251
socre=0.9789784550666809, id=20200229114241_688793
socre=0.9740474820137024, id=20200229151419_030062
socre=0.9570714831352234, id=20200229154825_515794
socre=0.9431037306785583, id=20200229101803_771858
socre=0.9542625546455383, id=20200229143603_019591
type of examples = <class 'list'>