In [5]:
import papermill as pm
import numpy as np
import pandas as pd
from os import mkdir, listdir
from os.path import exists, isfile, join
import json

In [6]:
def load_config(config_path = 'config.json'):
    file = open(config_path,'r')
    return json.load(file)

In [7]:
config = load_config('config.json')

In [13]:
config

{'img_height': 128,
 'img_width': 192,
 'epochs': 3,
 'batch_size': 128,
 'remove_background': False,
 'need_training': True,
 'prepare_datasets': True,
 'datasets_paths': [['/usr/data/shared_directory/team_1/data/plant_pathology_2020',
   'data_transforms/plant_pathology_2020_transform.ipynb']],
 'do_augmentation': True,
 'model_name': 'datagen-test',
 'model_json_path': None,
 'model_weights_path': None}

In [14]:
output_notebooks_dir = 'output_notebooks'

if not exists(output_notebooks_dir):
    mkdir(output_notebooks_dir) 
    
def craft_output_notebook_name(notebook_name):
    name = notebook_name.split('/')[-1]
    return join(output_notebooks_dir,name)

In [15]:
# запускаем трансформ датасетов

if config['prepare_datasets']:
    
    datasets_paths = config['datasets_paths']
    
    for (dataset_path,notebook_path) in datasets_paths:
        pm.execute_notebook(
            input_path = notebook_path,
            output_path = craft_output_notebook_name(notebook_path),
            parameters=dict(dataset_path = dataset_path,
                           output_path = 'data_transforms'),
            nest_asyncio=True
        )

HBox(children=(FloatProgress(value=0.0, description='Executing', max=7.0, style=ProgressStyle(description_widt…




In [16]:
# запускаем file rearrange

if config['prepare_datasets']:
    
    if not exists('data'):
        mkdir('data')
        
    pm.execute_notebook(
        input_path = 'file_rearrange.ipynb',
        output_path = craft_output_notebook_name('file_rearrange.ipynb'),
        parameters = dict(csv_folder = 'data_transforms',
                         output_path_pathologies = 'data',
                         output_path_plants = 'data'),
        nest_asyncio=True
    )
    
#train_pathology.csv
#test_pathology.csv
#train_plants.csv
#test_plants.csv

HBox(children=(FloatProgress(value=0.0, description='Executing', max=15.0, style=ProgressStyle(description_wid…




In [17]:
# запускаем copy_resize
if config['prepare_datasets']:
    
    pm.execute_notebook(
        input_path = 'copy_resize.ipynb',
        output_path = craft_output_notebook_name('copy_resize.ipynb'),
        parameters = dict(root = 'data'),
        nest_asyncio=True
    )

HBox(children=(FloatProgress(value=0.0, description='Executing', max=12.0, style=ProgressStyle(description_wid…




In [18]:
# запусксем тренировку модели

pm.execute_notebook(
    input_path = 'train_model.ipynb',
    output_path = craft_output_notebook_name('train_model.ipynb'),
    parameters = dict(
        train_pathology_path = 'data/train_pathology.csv',
        test_pathology_path = 'data/test_pathology.csv',
        train_plants_path = 'data/train_plants.csv',
        test_plants_path = 'data/test_plants.csv'
    ),
    nest_asyncio=True
)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=23.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'code',
   'execution_count': 1,
   'metadata': {'tags': ['parameters'],
    'papermill': {'exception': False,
     'start_time': '2020-06-17T13:55:08.339035',
     'end_time': '2020-06-17T13:55:08.356063',
     'duration': 0.017028,
     'status': 'completed'},
    'execution': {'iopub.status.busy': '2020-06-17T13:55:08.353925Z',
     'iopub.execute_input': '2020-06-17T13:55:08.354191Z',
     'iopub.status.idle': '2020-06-17T13:55:08.355594Z',
     'shell.execute_reply': '2020-06-17T13:55:08.355966Z'}},
   'outputs': [],
   'source': "train_pathology_path = 'data/train_pathology.csv'\ntest_pathology_path = 'data/test_pathology.csv'\ntrain_plants_path = 'data/train_plants.csv'\ntest_plants_path = 'data/test_plants.csv'"},
  {'cell_type': 'code',
   'metadata': {'tags': ['injected-parameters'],
    'papermill': {'exception': False,
     'start_time': '2020-06-17T13:55:08.366620',
     'end_time': '2020-06-17T13:55:08.377651',
     'duration': 0.011031,
     'sta

In [19]:
#запускаем репорт

model_name = config['model_name']

pm.execute_notebook(
    input_path = 'report.ipynb',
    output_path = craft_output_notebook_name('report.ipynb'),
    parameters = dict(
        model_name = config['model_name'],
        model_path = join('output',model_name,'model.json'),
        weights_path = join('output',model_name,'model_weights.h5'),
        history_path = join('output',model_name,'history'),
        csv_path = 'data/test_pathology.csv'
    ),
    nest_asyncio=True
)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=10.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-06-17T13:59:49.404159',
     'end_time': '2020-06-17T13:59:49.411030',
     'duration': 0.006871,
     'status': 'completed'}},
   'source': 'txt file, в котором в первой строке написано название модели\n(его можно взять из конфиг файла)\nво второй строке написан acc\nв третьей строке написан loss\nво четвертой строке написан val acc\nв пятой строке написан val loss\nсохранить png картинку с графиком (сделать все на один график и лосс и вал лосс и акк и вал акк)\nнемесил в пакетике'},
  {'cell_type': 'code',
   'execution_count': 1,
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-06-17T13:59:49.417555',
     'end_time': '2020-06-17T13:59:50.729032',
     'duration': 1.311477,
     'status': 'completed'},
    'execution': {'iopub.status.busy': '2020-06-17T13:59:49.425502Z',
     'iopub.execute_input': '2020-06-17T13:59

In [20]:
#сохраняем конфиг
import shutil
shutil.copy('config.json',join('output',model_name,'config.json'))

'output/datagen-test/config.json'

In [None]:
#запускаем блокнот с визуализацией

viz_data = 'data/test_pathology.csv'

pm.execute_notebook(
    input_path = 'visualization.ipynb',
    output_path = join('output',model_name,'visualization.ipynb'),
    parameters = dict(
        model_name = config['model_name'],
        model_path = join('output',model_name,'model.json'),
        weights_path = join('output',model_name,'model_weights.h5'),
        csv_path = viz_data,
        augment_on_predict = False
    ),
    nest_asyncio=True
)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=20.0, style=ProgressStyle(description_wid…