In [1]:
import papermill as pm
import numpy as np
import pandas as pd
from os import mkdir, listdir
from os.path import exists, isfile, join
import json

In [2]:
def load_config(config_path = 'config.json'):
    file = open(config_path,'r')
    return json.load(file)

In [3]:
config = load_config('config.json')

In [4]:
config

{'img_height': 128,
 'img_width': 192,
 'epochs': 1,
 'batch_size': 16,
 'remove_background': False,
 'need_training': True,
 'prepare_datasets': True,
 'datasets_paths': [['/usr/data/shared_directory/team_1/data/plant_pathology_2020',
   'data_transforms/plant_pathology_2020_transform.ipynb']],
 'do_augmentation': True,
 'model_name': 'PSU_model',
 'model_json_path': None,
 'model_weights_path': None}

In [5]:
output_notebooks_dir = 'output_notebooks'

if not exists(output_notebooks_dir):
    mkdir(output_notebooks_dir) 
    
def craft_output_notebook_name(notebook_name):
    name = notebook_name.split('/')[-1]
    return join(output_notebooks_dir,name)

In [6]:
# запускаем трансформ датасетов

if config['prepare_datasets']:
    
    datasets_paths = config['datasets_paths']
    
    for (dataset_path,notebook_path) in datasets_paths:
        pm.execute_notebook(
            input_path = notebook_path,
            output_path = craft_output_notebook_name(notebook_path),
            parameters=dict(dataset_path = dataset_path,
                           output_path = 'data_transforms'),
            nest_asyncio=True
        )

HBox(children=(FloatProgress(value=0.0, description='Executing', max=7.0, style=ProgressStyle(description_widt…




In [7]:
# запускаем file rearrange

if config['prepare_datasets']:
    
    if not exists('data'):
        mkdir('data')
        
    pm.execute_notebook(
        input_path = 'file_rearrange.ipynb',
        output_path = craft_output_notebook_name('file_rearrange.ipynb'),
        parameters = dict(csv_folder = 'data_transforms',
                         output_path_pathologies = 'data',
                         output_path_plants = 'data'),
        nest_asyncio=True
    )
    
#train_pathology.csv
#test_pathology.csv
#train_plants.csv
#test_plants.csv

HBox(children=(FloatProgress(value=0.0, description='Executing', max=15.0, style=ProgressStyle(description_wid…




In [8]:
# запускаем copy_resize
if config['prepare_datasets']:
    
    pm.execute_notebook(
        input_path = 'copy_resize.ipynb',
        output_path = craft_output_notebook_name('copy_resize.ipynb'),
        parameters = dict(root = 'data'),
        nest_asyncio=True
    )

HBox(children=(FloatProgress(value=0.0, description='Executing', max=12.0, style=ProgressStyle(description_wid…




In [9]:
# запусксем тренировку модели

pm.execute_notebook(
    input_path = 'train_model.ipynb',
    output_path = craft_output_notebook_name('train_model.ipynb'),
    parameters = dict(
        train_pathology_path = 'data/train_pathology.csv',
        test_pathology_path = 'data/test_pathology.csv',
        train_plants_path = 'data/train_plants.csv',
        test_plants_path = 'data/test_plants.csv'
    ),
    nest_asyncio=True
)

HBox(children=(FloatProgress(value=0.0, description='Executing', max=22.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'code',
   'execution_count': 1,
   'metadata': {'tags': ['parameters'],
    'papermill': {'exception': False,
     'start_time': '2020-06-11T10:54:50.325937',
     'end_time': '2020-06-11T10:54:50.343814',
     'duration': 0.017877,
     'status': 'completed'},
    'execution': {'iopub.status.busy': '2020-06-11T10:54:50.342163Z',
     'iopub.execute_input': '2020-06-11T10:54:50.342432Z',
     'shell.execute_reply': '2020-06-11T10:54:50.343473Z',
     'iopub.status.idle': '2020-06-11T10:54:50.343767Z'}},
   'outputs': [],
   'source': "train_pathology_path = 'data/train_pathology.csv'\ntest_pathology_path = 'data/test_pathology.csv'\ntrain_plants_path = 'data/train_plants.csv'\ntest_plants_path = 'data/test_plants.csv'"},
  {'cell_type': 'code',
   'metadata': {'tags': ['injected-parameters'],
    'papermill': {'exception': False,
     'start_time': '2020-06-11T10:54:50.351845',
     'end_time': '2020-06-11T10:54:50.362730',
     'duration': 0.010885,
     'sta

In [10]:
#запускаем файл репорт

model_name = config['model_name']

pm.execute_notebook(
    input_path = 'report.ipynb',
    output_path = craft_output_notebook_name('report.ipynb'),
    parameters = dict(
        model_name = config['model_name'],
        model_path = join('output',model_name,'model.json'),
        weights_path = join('output',model_name,'model_weights.h5'),
        history_path = join('output',model_name,'history'),
        csv_path = 'data/test_pathology.csv'
    ),
    nest_asyncio=True
)


HBox(children=(FloatProgress(value=0.0, description='Executing', max=10.0, style=ProgressStyle(description_wid…




{'cells': [{'cell_type': 'markdown',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-06-11T10:56:38.625592',
     'end_time': '2020-06-11T10:56:38.632475',
     'duration': 0.006883,
     'status': 'completed'}},
   'source': 'txt file, в котором в первой строке написано название модели\n(его можно взять из конфиг файла)\nво второй строке написан acc\nв третьей строке написан loss\nво четвертой строке написан val acc\nв пятой строке написан val loss\nсохранить png картинку с графиком (сделать все на один график и лосс и вал лосс и акк и вал акк)\nнемесил в пакетике'},
  {'cell_type': 'code',
   'execution_count': 1,
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2020-06-11T10:56:38.639350',
     'end_time': '2020-06-11T10:56:39.913521',
     'duration': 1.274171,
     'status': 'completed'},
    'execution': {'iopub.status.busy': '2020-06-11T10:56:38.646694Z',
     'iopub.execute_input': '2020-06-11T10:56