diff --git a/configs/default/components/publishers/stream_file_exporter.yml b/configs/default/components/publishers/stream_file_exporter.yml new file mode 100644 index 0000000..1a5546f --- /dev/null +++ b/configs/default/components/publishers/stream_file_exporter.yml @@ -0,0 +1,34 @@ +# This file defines the default values for the Stream File Exporter. + +#################################################################### +# 1. CONFIGURATION PARAMETERS that will be LOADED by the component. +#################################################################### + +# List of names of streams that will be displayed (LOADED) +# Can be string a single name or or comma separated string with list +input_streams: '' + +# Separator that will be placed between values (LOADED) +separator: ',' + +# Name of the file containing output values (LOADED) +filename: 'outputs.txt' + +streams: + #################################################################### + # 2. Keymappings associated with INPUT and OUTPUT streams. + #################################################################### + +globals: + #################################################################### + # 3. Keymappings of variables that will be RETRIEVED from GLOBALS. + #################################################################### + + #################################################################### + # 4. Keymappings associated with GLOBAL variables that will be SET. + #################################################################### + + #################################################################### + # 5. Keymappings associated with statistics that will be ADDED. + #################################################################### + diff --git a/configs/vqa_med_2019/default_extend_answers.yml b/configs/vqa_med_2019/default_extend_answers.yml new file mode 100644 index 0000000..270d5d1 --- /dev/null +++ b/configs/vqa_med_2019/default_extend_answers.yml @@ -0,0 +1,62 @@ +# This config is not a standalone config! +# It adds new sections (sets) without samplers and components for saving answers that we can use for getting final answers. + +training_answers: + problem: + type: &p_type VQAMED2019 + data_folder: &data_folder ~/data/vqa-med + split: training + categories: all + resize_image: &resize_image [224, 224] + batch_size: 64 + dataloader: + # No sampler, process samples in the same order. + shuffle: false + # Use 1 worker, so batches will follow the samples order. + num_workers: 1 + +validation_answers: + problem: + type: *p_type + data_folder: *data_folder + split: validation + resize_image: *resize_image + batch_size: 64 + dataloader: + # No sampler, process samples in the same order. + shuffle: false + # Use 1 worker, so batches will follow the samples order. + num_workers: 1 + + +# Testing parameters: +test_answers: + problem: + type: *p_type + data_folder: *data_folder + split: test + resize_image: *resize_image + batch_size: 64 + dataloader: + # No sampler, process samples in the same order. + shuffle: false + # Use 1 worker, so batches will follow the samples order. + num_workers: 1 + +# Add component for exporting answers to files. +pipeline: + disable: viewer +# # Viewers. + viewer_extended: + priority: 100.4 + type: StreamViewer + sample_number: 0 + input_streams: indices,image_ids,questions,category_names,predicted_categories,answers,tokenized_answers,predicted_answers + + exporter: + priority: 100.5 + type: StreamFileExporter + separator: '|' + input_streams: indices,image_ids,questions,category_names,predicted_categories,answers,tokenized_answers,predicted_answers + +#: pipeline diff --git a/configs/vqa_med_2019/default_vqa_med_2019.yml b/configs/vqa_med_2019/default_vqa_med_2019.yml index adf587e..11d7222 100644 --- a/configs/vqa_med_2019/default_vqa_med_2019.yml +++ b/configs/vqa_med_2019/default_vqa_med_2019.yml @@ -42,14 +42,3 @@ validation: # Use four workers for loading images. dataloader: num_workers: 4 - - -# Testing parameters: -testing: - problem: - type: *p_type - data_folder: *data_folder - split: test - resize_image: *resize_image - batch_size: 32 - diff --git a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml index 204ea33..79c56ea 100644 --- a/configs/vqa_med_2019/question_categorization/default_question_categorization.yml +++ b/configs/vqa_med_2019/question_categorization/default_question_categorization.yml @@ -2,15 +2,19 @@ default_configs: vqa_med_2019/default_vqa_med_2019.yml training: + problem: categories: all export_sample_weights: ~/data/vqa-med/answers.all.weights.csv sampler: weights: ~/data/vqa-med/answers.all.weights.csv - - # settings parameters terminal_conditions: loss_stop: 1.0e-3 +validation: + problem: + categories: all + + pipeline: # Predictions decoder. diff --git a/ptp/components/problems/image_text_to_class/vqa_med_2019.py b/ptp/components/problems/image_text_to_class/vqa_med_2019.py index 9a58296..e156eba 100644 --- a/ptp/components/problems/image_text_to_class/vqa_med_2019.py +++ b/ptp/components/problems/image_text_to_class/vqa_med_2019.py @@ -33,7 +33,7 @@ from ptp.data_types.data_definition import DataDefinition from ptp.components.utils.io import save_nparray_to_csv_file -from ptp.configuration.config_parsing import get_value_list_from_dictionary +from ptp.configuration.config_parsing import get_value_list_from_dictionary, get_value_from_dictionary class VQAMED2019(Problem): @@ -109,11 +109,49 @@ def __init__(self, name, config): self.globals["category_word_mappings"] = {'C1': 0, 'C2': 1, 'C3': 2, 'C4': 3, 'BINARY': 4, '': 5} self.category_idx_to_word = {0: 'C1', 1: 'C2', 2: 'C3', 3: 'C4', 4: 'BINARY', 5: ''} + # Get image preprocessing. + self.image_preprocessing = get_value_list_from_dictionary( + "image_preprocessing", self.config, + 'none | random_affine | random_horizontal_flip | normalize | all'.split(" | ") + ) + if 'none' in self.image_preprocessing: + self.image_preprocessing = [] + if 'all' in self.image_preprocessing: + self.image_preprocessing = 'random_affine | random_horizontal_flip | normalize'.split(" | ") + self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing)) + + + # Get question preprocessing. + self.question_preprocessing = get_value_list_from_dictionary( + "question_preprocessing", self.config, + 'none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all'.split(" | ") + ) + if 'none' in self.question_preprocessing: + self.question_preprocessing = [] + if 'all' in self.question_preprocessing: + self.question_preprocessing = 'lowercase | remove_punctuation | tokenize | remove_stop_words | shuffle_words'.split(" | ") + self.logger.info("Applied question preprocessing: {}".format(self.question_preprocessing)) + + # Get answer preprocessing. + self.answer_preprocessing = get_value_list_from_dictionary( + "answer_preprocessing", self.config, + 'none | lowercase | remove_punctuation | tokenize | all'.split(" | ") + ) + if 'none' in self.answer_preprocessing: + self.answer_preprocessing = [] + if 'all' in self.answer_preprocessing: + self.answer_preprocessing = 'lowercase | remove_punctuation | tokenize '.split(" | ") + self.logger.info("Applied answer preprocessing: {}".format(self.answer_preprocessing)) + + # Get the absolute path. self.data_folder = os.path.expanduser(self.config['data_folder']) + # Get split. + split = get_value_from_dictionary('split', self.config, "training,validation,training_validation,test".split(",")) + # Set split-dependent data. - if self.config['split'] == 'training': + if split == 'training': # Training split folder. split_folder = os.path.join(self.data_folder, "ImageClef-2019-VQA-Med-Training") # Set source files. @@ -131,8 +169,10 @@ def __init__(self, name, config): # Filter lists taking into account configuration. source_files, source_image_folders, source_categories = self.filter_sources(source_files, source_image_folders, source_categories) + # Load dataset. + self.dataset = self.load_dataset(source_files, source_image_folders, source_categories) - elif self.config['split'] == 'validation': + elif split == 'validation': # Validation split folder. split_folder = os.path.join(self.data_folder, "ImageClef-2019-VQA-Med-Validation") @@ -152,8 +192,10 @@ def __init__(self, name, config): # Filter lists taking into account configuration. source_files, source_image_folders, source_categories = self.filter_sources(source_files, source_image_folders, source_categories) + # Load dataset. + self.dataset = self.load_dataset(source_files, source_image_folders, source_categories) - elif self.config['split'] == 'training_validation': + elif split == 'training_validation': # This split takes both training and validation and assumes utilization of kFoldWeightedRandomSampler. # 1. Training split folder. @@ -198,47 +240,17 @@ def __init__(self, name, config): source_files = [*training_source_files, *valid_source_files] source_image_folders = [*training_source_image_folders, *valid_source_image_folders] source_categories = [*training_source_categories, *valid_source_categories] - # else: # Test set. # TODO - - # Get image preprocessing. - self.image_preprocessing = get_value_list_from_dictionary( - "image_preprocessing", self.config, - 'none | random_affine | random_horizontal_flip | normalize | all'.split(" | ") - ) - if 'none' in self.image_preprocessing: - self.image_preprocessing = [] - if 'all' in self.image_preprocessing: - self.image_preprocessing = 'random_affine | random_horizontal_flip | normalize'.split(" | ") - self.logger.info("Applied image preprocessing: {}".format(self.image_preprocessing)) - - - # Get question preprocessing. - self.question_preprocessing = get_value_list_from_dictionary( - "question_preprocessing", self.config, - 'none | lowercase | remove_punctuation | tokenize | random_remove_stop_words | random_shuffle_words | all'.split(" | ") - ) - if 'none' in self.question_preprocessing: - self.question_preprocessing = [] - if 'all' in self.question_preprocessing: - self.question_preprocessing = 'lowercase | remove_punctuation | tokenize | remove_stop_words | shuffle_words'.split(" | ") - self.logger.info("Applied question preprocessing: {}".format(self.question_preprocessing)) + # Load dataset. + self.dataset = self.load_dataset(source_files, source_image_folders, source_categories) - # Get answer preprocessing. - self.answer_preprocessing = get_value_list_from_dictionary( - "answer_preprocessing", self.config, - 'none | lowercase | remove_punctuation | tokenize | all'.split(" | ") - ) - if 'none' in self.answer_preprocessing: - self.answer_preprocessing = [] - if 'all' in self.answer_preprocessing: - self.answer_preprocessing = 'lowercase | remove_punctuation | tokenize '.split(" | ") - self.logger.info("Applied answer preprocessing: {}".format(self.answer_preprocessing)) - - - # Load dataset. - self.logger.info("Loading dataset from files:\n {}".format(source_files)) - self.dataset = self.load_dataset(source_files, source_image_folders, source_categories) - self.logger.info("Loaded dataset consisting of {} samples".format(len(self.dataset))) + else: + # Test set. + split_folder = os.path.join(self.data_folder, "ImageClef-2019-VQA-Med-Test") + # Set source file. + source_file = os.path.join(split_folder,"VQAMed2019_Test_Questions.txt") + # Set image folder. + source_image_folder = os.path.join(split_folder, 'VQAMed2019_Test_Images') + self.dataset = self.load_testset(source_file, source_image_folder) # Display exemplary sample. self.logger.info("Exemplary sample:\n [ category: {}\t image_ids: {}\t question: {}\t answer: {} ]".format( @@ -492,6 +504,7 @@ def load_dataset(self, source_files, source_image_folders, source_categories): :param source_categories: List of categories associated with each of those files. ( unknown) """ + self.logger.info("Loading dataset from files:\n {}".format(source_files)) # Set containing list of tuples. dataset = [] @@ -542,6 +555,67 @@ def load_dataset(self, source_files, source_image_folders, source_categories): t.update() t.close() + self.logger.info("Loaded dataset consisting of {} samples".format(len(dataset))) + # Return the created list. + return dataset + + + def load_testset(self, data_file, image_folder): + """ + Loads the test set. + + :param data_file: Source file. + + :param image_folder: Folder containing image files. + + """ + # Set containing list of tuples. + dataset = [] + category_id = 5 # + answer = '' + + # Set absolute path to file. + self.logger.info('Loading test set from {}...'.format(data_file)) + # Load file content using '|' separator. + df = pd.read_csv(filepath_or_buffer=data_file, sep='|',header=None, + names=[self.key_image_ids,self.key_questions]) + + # Add tdqm bar. + t = tqdm.tqdm(total=len(df.index)) + for _, row in df.iterrows(): + # Retrieve question and answer. + question = row[self.key_questions] + + # Process question - if required. + preprocessed_question = self.preprocess_text( + question, + 'lowercase' in self.question_preprocessing, + 'remove_punctuation' in self.question_preprocessing, + 'tokenize' in self.question_preprocessing, + 'remove_stop_words' in self.question_preprocessing + ) + + # Process answer - if required. + if 'tokenize' in self.answer_preprocessing: + preprocessed_answer = [answer] + else: + preprocessed_answer = answer + + # Add record to dataset. + dataset.append({ + # Image name and path leading to it. + self.key_image_ids: row[self.key_image_ids], + "image_folder": image_folder, + self.key_questions: preprocessed_question, + self.key_answers: preprocessed_answer, + # Add category. + self.key_category_ids: category_id + }) + + t.update() + t.close() + + self.logger.info("Loaded dataset consisting of {} samples".format(len(dataset))) # Return the created list. return dataset diff --git a/ptp/components/publishers/__init__.py b/ptp/components/publishers/__init__.py index 1db7f75..2c8e764 100644 --- a/ptp/components/publishers/__init__.py +++ b/ptp/components/publishers/__init__.py @@ -3,6 +3,7 @@ from .bleu_statistics import BLEUStatistics from .global_variable_publisher import GlobalVariablePublisher from .precision_recall_statistics import PrecisionRecallStatistics +from .stream_file_exporter import StreamFileExporter __all__ = [ 'AccuracyStatistics', @@ -10,4 +11,5 @@ 'BLEUStatistics', 'GlobalVariablePublisher', 'PrecisionRecallStatistics', + 'StreamFileExporter', ] diff --git a/ptp/components/publishers/stream_file_exporter.py b/ptp/components/publishers/stream_file_exporter.py new file mode 100644 index 0000000..ec34ea5 --- /dev/null +++ b/ptp/components/publishers/stream_file_exporter.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) tkornuta, IBM Corporation 2019 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__author__ = "Tomasz Kornuta" + +from os import path + +from ptp.configuration.config_parsing import get_value_list_from_dictionary +from ptp.components.component import Component +from ptp.data_types.data_definition import DataDefinition + + +class StreamFileExporter(Component): + """ + Utility for exporting contents of streams of a given batch to file. + """ + + def __init__(self, name, config): + """ + Initializes the object, retrieves names of input streams and creates the output file in experiment directory. + + :param name: Name of the component. + :type name: str + + :param config: Dictionary of parameters (read from the configuration ``.yaml`` file). + :type config: :py:class:`ptp.configuration.ConfigInterface` + + """ + # Call constructors of parent classes. + Component.__init__(self, name, StreamFileExporter, config) + + # Get key mappings for indices. + self.key_indices = self.stream_keys["indices"] + + # Load list of streams names (keys). + self.input_stream_keys = get_value_list_from_dictionary("input_streams", self.config) + + # Get separator. + self.separator = self.config["separator"] + + # Create file where we will write the results. + filename = self.config["filename"] + abs_filename = path.join(self.app_state.log_dir, filename) + self.file = open(abs_filename, 'w') + self.logger.info("Writing values from {} streams to {}".format(self.input_stream_keys, abs_filename)) + + + def input_data_definitions(self): + """ + Function returns a dictionary with definitions of input data that are required by the component. + + :return: dictionary containing input data definitions (each of type :py:class:`ptp.data_types.DataDefinition`). + """ + return { + self.key_indices: DataDefinition([-1, 1], [list, int], "Batch of sample indices [BATCH_SIZE] x [1]"), + } + + def output_data_definitions(self): + """ + Function returns a dictionary with definitions of output data produced the component. + + :return: dictionary containing output data definitions (each of type :py:class:`ptp.data_types.DataDefinition`). + """ + return { + } + + def __call__(self, data_dict): + """ + Exports values from the indicated streams to file. + :param data_dict: :py:class:`ptp.utils.DataDict` object containing "indices" and other streams that will be exported to file. + """ + # Get batch size. + indices = data_dict[self.key_indices] + batch_size = len(indices) + + # Check present streams. + absent_streams = [] + present_streams = [] + for stream_key in self.input_stream_keys: + if stream_key in data_dict.keys(): + present_streams.append(stream_key) + else: + absent_streams.append(stream_key) + + # Export values to file. + for i in range(batch_size): + val_str = '' + for stream_key in self.input_stream_keys: + if stream_key in present_streams: + value = data_dict[stream_key][i] + # Add value changed to string along with separator. + val_str = val_str + '{}'.format(value) + self.separator + # Remove the last separator. + val_str = val_str[:-1] + '\n' + # Write it to file. + self.file.write(val_str) + + # Log values and inform about missing streams. + if len(absent_streams) > 0: + self.logger.warning("Could not export the following (absent) streams: {}".format(absent_streams)) diff --git a/ptp/components/viewers/stream_viewer.py b/ptp/components/viewers/stream_viewer.py index d313cda..9d893cb 100644 --- a/ptp/components/viewers/stream_viewer.py +++ b/ptp/components/viewers/stream_viewer.py @@ -18,6 +18,7 @@ import numpy as np +from ptp.configuration.config_parsing import get_value_list_from_dictionary from ptp.components.component import Component from ptp.data_types.data_definition import DataDefinition @@ -46,9 +47,7 @@ def __init__(self, name, config): self.key_indices = self.stream_keys["indices"] # Load list of streams names (keys). - self.input_stream_keys = self.config["input_streams"] - if type(self.input_stream_keys) == str: - self.input_stream_keys = self.input_stream_keys.replace(" ", "").split(",") + self.input_stream_keys = get_value_list_from_dictionary("input_streams", self.config) # Get sample number. self.sample_number = self.config["sample_number"] @@ -95,9 +94,15 @@ def __call__(self, data_dict): sample_number = self.sample_number # Generate displayed string. + absent_streams = [] disp_str = "Showing selected streams for sample {}:\n".format(sample_number) for stream_key in self.input_stream_keys: if stream_key in data_dict.keys(): disp_str += " '{}': {}\n".format(stream_key, data_dict[stream_key][sample_number]) + else: + absent_streams.append(stream_key) + # Log values and inform about missing streams. self.logger.info(disp_str) + if len(absent_streams) > 0: + self.logger.warning("Could not display the following (absent) streams: {}".format(absent_streams)) diff --git a/ptp/configuration/config_parsing.py b/ptp/configuration/config_parsing.py index c30aa10..7d20f4c 100644 --- a/ptp/configuration/config_parsing.py +++ b/ptp/configuration/config_parsing.py @@ -122,7 +122,7 @@ def load_class_default_config_file(class_type): module = class_type.__module__.replace(".","/") rel_path = module[module.find("ptp")+4:] # Build the abs path to the default config file of a given component. - abs_default_config = AppState().absolute_config_path + "default/" + rel_path + ".yml" + abs_default_config = os.path.join(AppState().absolute_config_path, "default", rel_path) + ".yml" # Check if file exists. if not os.path.isfile(abs_default_config): @@ -146,14 +146,14 @@ def load_class_default_config_file(class_type): exit(-2) -def recurrent_config_parse(configs: str, configs_parsed: list, abs_config_path: str): +def recurrent_config_parse(configs_to_parse: list, configs_parsed: list, abs_config_path: str): """ Parses names of configuration files in a recursive manner, i.e. \ by looking for ``default_config`` sections and trying to load and parse those \ files one by one. - :param configs: String containing names of configuration files (with paths), separated by comas. - :type configs: str + :param configs_to_parse: List containing names of configuration files (with paths). + :type configs_to_parse: list :param configs_parsed: Configurations that were already parsed (so we won't parse them many times). :type configs_parsed: list @@ -163,37 +163,33 @@ def recurrent_config_parse(configs: str, configs_parsed: list, abs_config_path: :return: list of parsed configuration files. """ - # Split and remove spaces. - configs_to_parse = configs.replace(" ", "").split(',') - # Terminal condition. while len(configs_to_parse) > 0: # Get config. config = configs_to_parse.pop(0) - abs_config = abs_config_path + config # Skip empty names (after lose comas). if config == '': continue - print("Info: Parsing the {} configuration file".format(abs_config)) + print("Info: Parsing the {} configuration file".format(config)) # Check if it was already loaded. if config in configs_parsed: - print('Warning: Configuration file {} already parsed - skipping'.format(abs_config)) + print('Warning: Configuration file {} already parsed - skipping'.format(config)) continue # Check if file exists. - if not os.path.isfile(abs_config): - print('Error: Configuration file {} does not exist'.format(abs_config)) + if not os.path.isfile(config): + print('Error: Configuration file {} does not exist'.format(config)) exit(-1) try: # Open file and get parameter dictionary. - with open(abs_config, 'r') as stream: + with open(config, 'r') as stream: param_dict = yaml.safe_load(stream) except yaml.YAMLError as e: - print("Error: Couldn't properly parse the {} configuration file".format(abs_config)) + print("Error: Couldn't properly parse the {} configuration file".format(config)) print('yaml.YAMLERROR:', e) exit(-1) @@ -202,29 +198,28 @@ def recurrent_config_parse(configs: str, configs_parsed: list, abs_config_path: # Check if there are any default configs to load. if 'default_configs' in param_dict: - # If there are - recursion! - configs_parsed = recurrent_config_parse( - param_dict['default_configs'], configs_parsed, abs_config_path) + default_configs_to_parse = param_dict['default_configs'].replace(" ", "").split(',') + # If there are - expand them to absolute paths. + abs_default_configs_to_parse = [os.path.join(abs_config_path,config) for config in default_configs_to_parse] + # Recursion! + configs_parsed = recurrent_config_parse(abs_default_configs_to_parse, configs_parsed, abs_config_path) # Done, return list of loaded configs. return configs_parsed -def reverse_order_config_load(config_interface_obj, configs_to_load, abs_config_path): +def reverse_order_config_load(config_interface_obj, configs_to_load): """ Loads configuration files in reversed order. :param config_interface_obj: Configuration interface object. - :param configs_to_load: list of configuration files to load (relative to config directory) - - :param abs_config_path: Absolute path to config directory. - + :param configs_to_load: list of configuration files to load (with absolute paths) """ for config in reversed(configs_to_load): # Load config from YAML file. - config_interface_obj.add_config_params_from_yaml(abs_config_path + config) - print('Info: Loaded configuration from file {}'.format(abs_config_path + config)) + config_interface_obj.add_config_params_from_yaml(config) + print('Info: Loaded configuration from file {}'.format(config)) def get_value_list_from_dictionary(key, parameter_dict, accepted_values = []): diff --git a/ptp/utils/app_state.py b/ptp/utils/app_state.py index 987f475..e102d8c 100644 --- a/ptp/utils/app_state.py +++ b/ptp/utils/app_state.py @@ -19,6 +19,8 @@ import torch +from os import path + from ptp.utils.singleton import SingletonMetaClass @@ -53,9 +55,16 @@ def __init__(self): # Field storing global variables. self.__globals = dict() + # Get absolute path to configs from "~/./ptp/configs". + ptp_path = path.expanduser("~/.ptp/") + with open(path.join(ptp_path, "config.txt")) as file: + self.absolute_config_path = file.readline() + # Initialize logger logfile (as empty for now). self.log_file = None self.logger = None + # Set default path to current dir. + self.log_dir = path.expanduser(".") # Set CPU types as default. self.set_cpu_types() diff --git a/ptp/workers/__init__.py b/ptp/workers/__init__.py index bd73f49..990dffc 100644 --- a/ptp/workers/__init__.py +++ b/ptp/workers/__init__.py @@ -2,12 +2,12 @@ from .trainer import Trainer #from .offline_trainer import OfflineTrainer from .online_trainer import OnlineTrainer -#from .tester import Tester +from .processor import Processor __all__ = [ 'Worker', 'Trainer', #'OfflineTrainer', 'OnlineTrainer', - #'Tester' + 'Processor' ] diff --git a/ptp/workers/online_trainer.py b/ptp/workers/online_trainer.py index 3423e02..77e42fc 100644 --- a/ptp/workers/online_trainer.py +++ b/ptp/workers/online_trainer.py @@ -34,9 +34,8 @@ class OnlineTrainer(Trainer): it makes less sense for problems which have a very large, almost infinite, dataset (like algorithmic \ tasks, which generate random data on-the-fly). \ - This is why this OnlineTrainer was implemented. Instead of looping on epochs, it iterates directly on \ - episodes (we call an iteration on a single batch an episode). - + This is why this OnlineTrainer was implemented. Despite the fact it has the notion of epoch, it is more \ + flexible and operates on episodes (we call an iteration on a single batch an episode). \ """ @@ -108,7 +107,7 @@ def setup_experiment(self): # Export and log configuration, optionally asking the user for confirmation. config_parsing.display_parsing_results(self.logger, self.app_state.args, self.unparsed) config_parsing.display_globals(self.logger, self.app_state.globalitems()) - config_parsing.export_experiment_configuration_to_yml(self.logger, self.log_dir, "training_configuration.yaml", self.config, self.app_state.args.confirm) + config_parsing.export_experiment_configuration_to_yml(self.logger, self.app_state.log_dir, "training_configuration.yml", self.config, self.app_state.args.confirm) def run_experiment(self): """ @@ -346,7 +345,7 @@ def run_experiment(self): # Finalize statistics collection. self.finalize_statistics_collection() self.finalize_tensorboard() - self.logger.info("Experiment logged to: {}".format(self.log_dir)) + self.logger.info("Experiment logged to: {}".format(self.app_state.log_dir)) def main(): diff --git a/ptp/workers/tester.py b/ptp/workers/processor.py similarity index 62% rename from ptp/workers/tester.py rename to ptp/workers/processor.py index c012431..bfc29dc 100644 --- a/ptp/workers/tester.py +++ b/ptp/workers/processor.py @@ -22,7 +22,7 @@ from time import sleep from datetime import datetime -import ptp.configuration.config_parsing as config_parse +import ptp.configuration.config_parsing as config_parsing import ptp.utils.logger as logging from ptp.workers.worker import Worker @@ -34,97 +34,88 @@ from ptp.utils.statistics_aggregator import StatisticsAggregator -class Tester(Worker): +class Processor(Worker): """ - Defines the basic ``Tester``. + Defines the basic ``Processor``. - If defining another type of tester, it should subclass it. + If defining another type of Processor, it should subclass it. """ - def __init__(self, name="Tester"): + def __init__(self, name="Processor"): """ Calls the ``Worker`` constructor, adds some additional arguments to parser. - :param name: Name of the worker (DEFAULT: "Tester"). + :param name: Name of the worker (DEFAULT: "Processor"). :type name: str """ # Call base constructor to set up app state, registry and add default params. - super(Tester, self).__init__(name) + super(Processor, self).__init__(name) + self.parser.add_argument( + '--set', + dest='set', + type=str, + default="testing", + help='Name of the specific set (section containing problem) to be processed (DEFAULT: testing)') def setup_global_experiment(self): """ - Sets up the global test experiment for the ``Tester``: + Sets up the global test experiment for the ``Processor``: - - Checks that the model to use exists on file: + - Checks that the model to use exists - >>> if not os.path.isfile(flags.model) + - Checks that the configuration file exists - - Checks that the configuration file exists: - - >>> if not os.path.isfile(config_file) - - - Create the configuration: - - >>> self.config.add_config_params_from_yaml(config) + - Creates the configuration The rest of the experiment setup is done in :py:func:`setup_individual_experiment()` \ to allow for multiple tests suppport. """ # Call base method to parse all command line arguments and add default sections. - super(Tester, self).setup_experiment() - + super(Processor, self).setup_experiment() + + # Retrieve checkpoint file and section chkpt_file = self.app_state.args.load_checkpoint + self.set = self.app_state.args.set + + # Check the presence of the CUDA-compatible devices. + if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0): + self.logger.error("Cannot use GPU as there are no CUDA-compatible devices present in the system!") + exit(-1) + # Check if checkpoint file was indicated. if chkpt_file == "": print('Please pass path to and name of the file containing pipeline to be loaded as --load parameter') - exit(-1) - + exit(-2) # Check if file with model exists. if not os.path.isfile(chkpt_file): print('Checkpoint file {} does not exist'.format(chkpt_file)) - exit(-2) + exit(-3) # Extract path. - abs_config_path, _ = os.path.split(os.path.dirname(os.path.expanduser(chkpt_file))) + self.abs_path, _ = os.path.split(os.path.dirname(os.path.expanduser(chkpt_file))) + print(self.abs_path) # Check if config file was indicated by the user. if self.app_state.args.config != '': - root_config = self.app_state.args.config + # Split and make them absolute. + root_configs = self.app_state.args.config.replace(" ", "").split(',') + # If there are - expand them to absolute paths. + abs_root_configs = [os.path.expanduser(config) for config in root_configs] else: # Use the "default one". - root_config = os.path.join(abs_config_path, 'training_configuration.yaml') - - # Check if configuration file exists. - if not os.path.isfile(root_config): - print('Config file {} does not exist'.format(root_config)) - exit(-3) - - # Check the presence of the CUDA-compatible devices. - if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0): - self.logger.error("Cannot use GPU as there are no CUDA-compatible devices present in the system!") - exit(-4) - - # Extract absolute path to main ptp 'config' directory. - # Save it in app_state! - self.app_state.absolute_config_path = abs_config_path[:abs_config_path.find("configs")+8] - # Get relative path. - rel_config_path = abs_config_path[abs_config_path.find("configs")+8:] - - print("TODO: different root config extraction path!!") - print(self.app_state.absolute_config_path) - exit(1) + abs_root_configs = [os.path.join(self.abs_path, 'training_configuration.yml')] # Get the list of configurations which need to be loaded. - configs_to_load = config_parse.recurrent_config_parse(rel_config_path, [], self.app_state.absolute_config_path) + configs_to_load = config_parsing.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path) # Read the YAML files one by one - but in reverse order -> overwrite the first indicated config(s) - config_parse.reverse_order_config_load(self.config, configs_to_load, self.app_state.absolute_config_path) + config_parsing.reverse_order_config_load(self.config, configs_to_load) # -> At this point, the Config Registry contains the configuration loaded (and overwritten) from several files. @@ -133,31 +124,23 @@ def setup_individual_experiment(self): Setup individual test experiment in the case of multiple tests, or the main experiment in the case of \ one test experiment. - - Set up the log directory path: - - >>> os.makedirs(self.log_dir, exist_ok=False) - - - Add a FileHandler to the logger (defined in BaseWorker): - - >>> self.logger.addHandler(fh) - - - Set random seeds: + - Set up the log directory path - >>> self.set_random_seeds('testing', self.config['testing']) + - Set random seeds - Creates the pipeline consisting of many components - Creates testing problem manager - - Performs testing of compatibility of testing pipeline. + - Performs testing of compatibility of testing pipeline """ # Get testing problem type. try: - _ = self.config['testing']['problem']['type'] + _ = self.config[self.set]['problem']['type'] except KeyError: - print("Error: Couldn't retrieve the problem 'type' from the 'testing' section in the loaded configuration") + print("Error: Couldn't retrieve the problem 'type' from the '{}' section in the loaded configuration".format(self.set)) exit(-5) # Get pipeline name. @@ -171,59 +154,59 @@ def setup_individual_experiment(self): while True: # Dirty fix: if log_dir already exists, wait for 1 second and try again try: - time_str = 'test_{0:%Y%m%d_%H%M%S}'.format(datetime.now()) + time_str = self.set+'_{0:%Y%m%d_%H%M%S}'.format(datetime.now()) if self.app_state.args.savetag != '': time_str = time_str + "_" + self.app_state.args.savetag - self.log_dir = self.abs_path + '/' + time_str + '/' + self.app_state.log_dir = self.abs_path + '/' + time_str + '/' # Lowercase dir. - self.log_dir = self.log_dir.lower() - os.makedirs(self.log_dir, exist_ok=False) + self.app_state.log_dir = self.app_state.log_dir.lower() + os.makedirs(self.app_state.log_dir, exist_ok=False) except FileExistsError: sleep(1) else: break # Set log dir. - self.app_state.log_file = self.log_dir + 'tester.log' + self.app_state.log_file = self.app_state.log_dir + 'processor.log' # Initialize logger in app state. self.app_state.logger = logging.initialize_logger("AppState") # Add handlers for the logfile to worker logger. logging.add_file_handler_to_logger(self.logger) - self.logger.info("Logger directory set to: {}".format(self.log_dir )) + self.logger.info("Logger directory set to: {}".format(self.app_state.log_dir )) # Set cpu/gpu types. self.app_state.set_types() # Set random seeds in the testing section. - self.set_random_seeds('testing', self.config['testing']) + self.set_random_seeds(self.set, self.config[self.set]) # Total number of detected errors. errors =0 ################# TESTING PROBLEM ################# - # Build training problem manager. - self.testing = ProblemManager('testing', self.config['testing']) - errors += self.testing.build() + # Build the used problem manager. + self.pm = ProblemManager(self.set, self.config[self.set]) + errors += self.pm.build() # check if the maximum number of episodes is specified, if not put a # default equal to the size of the dataset (divided by the batch size) # So that by default, we loop over the test set once. - max_test_episodes = len(self.testing) + max_test_episodes = len(self.pm) - self.config['testing']['problem'].add_default_params({'max_test_episodes': max_test_episodes}) - if self.config["testing"]["problem"]["max_test_episodes"] == -1: + self.config[self.set]['problem'].add_default_params({'max_test_episodes': max_test_episodes}) + if self.config[self.set]["problem"]["max_test_episodes"] == -1: # Overwrite the config value! - self.config['testing']['problem'].add_config_params({'max_test_episodes': max_test_episodes}) + self.config[self.set]['problem'].add_config_params({'max_test_episodes': max_test_episodes}) # Warn if indicated number of episodes is larger than an epoch size: - if self.config["testing"]["problem"]["max_test_episodes"] > max_test_episodes: + if self.config[self.set]["problem"]["max_test_episodes"] > max_test_episodes: self.logger.warning('Indicated maximum number of episodes is larger than one epoch, reducing it.') - self.config['testing']['problem'].add_config_params({'max_test_episodes': max_test_episodes}) + self.config[self.set]['problem'].add_config_params({'max_test_episodes': max_test_episodes}) self.logger.info("Setting the max number of episodes to: {}".format( - self.config["testing"]["problem"]["max_test_episodes"])) + self.config[self.set]["problem"]["max_test_episodes"])) ###################### PIPELINE ###################### @@ -233,7 +216,7 @@ def setup_individual_experiment(self): # Show pipeline. summary_str = self.pipeline.summarize_all_components_header() - summary_str += self.testing.problem.summarize_io("testing") + summary_str += self.pm.problem.summarize_io(self.set) summary_str += self.pipeline.summarize_all_components() self.logger.info(summary_str) @@ -244,7 +227,7 @@ def setup_individual_experiment(self): # Handshake definitions. self.logger.info("Handshaking testing pipeline") - defs_testing = self.testing.problem.output_data_definitions() + defs_testing = self.pm.problem.output_data_definitions() errors += self.pipeline.handshake(defs_testing) # Check errors. @@ -302,43 +285,45 @@ def setup_individual_experiment(self): self.pipeline.eval() # Export and log configuration, optionally asking the user for confirmation. - self.export_experiment_configuration(self.log_dir, "testing_configuration.yaml",self.app_state.args.confirm) + config_parsing.display_parsing_results(self.logger, self.app_state.args, self.unparsed) + config_parsing.display_globals(self.logger, self.app_state.globalitems()) + config_parsing.export_experiment_configuration_to_yml(self.logger, self.app_state.log_dir, "training_configuration.yml", self.config, self.app_state.args.confirm) def initialize_statistics_collection(self): """ Function initializes all statistics collectors and aggregators used by a given worker, creates output files etc. """ - # Create statistics collector for testing. - self.testing_stat_col = StatisticsCollector() - self.add_statistics(self.testing_stat_col) - self.testing.problem.add_statistics(self.testing_stat_col) - self.pipeline.add_statistics(self.testing_stat_col) - # Create the csv file to store the testing statistics. - self.testing_batch_stats_file = self.testing_stat_col.initialize_csv_file(self.log_dir, 'testing_statistics.csv') - - # Create statistics aggregator for testing. - self.testing_stat_agg = StatisticsAggregator() - self.add_aggregators(self.testing_stat_agg) - self.testing.problem.add_aggregators(self.testing_stat_agg) - self.pipeline.add_aggregators(self.testing_stat_agg) - # Create the csv file to store the testing statistic aggregations. + # Create statistics collector. + self.stat_col = StatisticsCollector() + self.add_statistics(self.stat_col) + self.pm.problem.add_statistics(self.stat_col) + self.pipeline.add_statistics(self.stat_col) + # Create the csv file to store the statistics. + self.pm_batch_stats_file = self.stat_col.initialize_csv_file(self.app_state.log_dir, self.set+'_statistics.csv') + + # Create statistics aggregator. + self.stat_agg = StatisticsAggregator() + self.add_aggregators(self.stat_agg) + self.pm.problem.add_aggregators(self.stat_agg) + self.pipeline.add_aggregators(self.stat_agg) + # Create the csv file to store the statistic aggregations. # Will contain a single row with aggregated statistics. - self.testing_set_stats_file = self.testing_stat_agg.initialize_csv_file(self.log_dir, 'testing_set_agg_statistics.csv') + self.pm_set_stats_file = self.stat_agg.initialize_csv_file(self.app_state.log_dir, self.set+'_set_agg_statistics.csv') def finalize_statistics_collection(self): """ Finalizes statistics collection, closes all files etc. """ # Close all files. - self.testing_batch_stats_file.close() - self.testing_set_stats_file.close() + self.pm_batch_stats_file.close() + self.pm_set_stats_file.close() def run_experiment(self): """ - Main function of the ``Tester``: Test the loaded model over the test set. + Main function of the ``Processor``: Test the loaded model over the set. - Iterates over the ``DataLoader`` for a maximum number of episodes equal to the test set size. + Iterates over the ``DataLoader`` for a maximum number of episodes equal to the set size. The function does the following for each episode: @@ -351,49 +336,54 @@ def run_experiment(self): # Initialize tensorboard and statistics collection. self.initialize_statistics_collection() - num_samples = len(self.testing) + num_samples = len(self.pm) - self.logger.info('Testing over the entire test set ({} samples in {} episodes)'.format( - num_samples, len(self.testing.dataloader))) + self.logger.info('Processing the entire set ({} samples in {} episodes)'.format( + num_samples, len(self.pm.dataloader))) try: - # Run test + # Run in no_grad mode. with torch.no_grad(): + # Reset the counter. + self.app_state.episode = -1 - episode = 0 - for test_dict in self.testing.dataloader: + # Inform the problem manager that epoch has started. + self.pm.initialize_epoch() + for batch in self.pm.dataloader: + # Increment counter. + self.app_state.episode += 1 # Terminal condition 0: max test episodes reached. - if episode == self.config["testing"]["problem"]["max_test_episodes"]: + if self.app_state.episode == self.config[self.set]["problem"]["max_test_episodes"]: break # Forward pass. - self.pipeline.forward(test_dict) + self.pipeline.forward(batch) # Collect the statistics. - self.collect_all_statistics(self.testing, self.pipeline, test_dict, - self.testing_stat_col, episode) + self.collect_all_statistics(self.pm, self.pipeline, batch, self.stat_col) # Export to csv - at every step. - self.testing_stat_col.export_to_csv() + self.stat_col.export_to_csv() # Log to logger - at logging frequency. - if episode % self.app_state.args.logging_interval == 0: - self.logger.info(self.testing_stat_col.export_to_string('[Partial Test]')) + if self.app_state.episode % self.app_state.args.logging_interval == 0: + self.logger.info(self.stat_col.export_to_string('[Partial]')) # move to next episode. - episode += 1 + self.app_state.episode += 1 # End for. + # Inform the problem managers that the epoch has ended. + self.pm.finalize_epoch() self.logger.info('\n' + '='*80) - self.logger.info('Test finished') + self.logger.info('Processing finished') # Aggregate statistics for the whole set. - self.aggregate_all_statistics(self.testing, self.pipeline, - self.testing_stat_col, self.testing_stat_agg, episode) + self.aggregate_all_statistics(self.pm, self.pipeline, self.stat_col, self.stat_agg) # Export aggregated statistics. - self.export_all_statistics(self.testing_stat_agg, '[Full Test]') + self.export_all_statistics(self.stat_agg, '[Full Set]') except SystemExit as e: @@ -405,22 +395,23 @@ def run_experiment(self): finally: # Finalize statistics collection. self.finalize_statistics_collection() + self.logger.info("Experiment logged to: {}".format(self.app_state.log_dir)) def main(): """ - Entry point function for the ``Tester``. + Entry point function for the ``Processor``. """ - tester = Tester() + processor = Processor() # parse args, load configuration and create all required objects. - tester.setup_global_experiment() + processor.setup_global_experiment() # finalize the experiment setup - tester.setup_individual_experiment() + processor.setup_individual_experiment() # run the experiment - tester.run_experiment() + processor.run_experiment() if __name__ == '__main__': diff --git a/ptp/workers/trainer.py b/ptp/workers/trainer.py index 85dc0ee..c3a583c 100644 --- a/ptp/workers/trainer.py +++ b/ptp/workers/trainer.py @@ -90,7 +90,7 @@ def setup_experiment(self): - Set up the log directory path: - >>> os.makedirs(self.log_dir, exist_ok=False) + >>> os.makedirs(self.app_state.log_dir, exist_ok=False) - Add a ``FileHandler`` to the logger: @@ -121,34 +121,26 @@ def setup_experiment(self): # Call base method to parse all command line arguments and add default sections. super(Trainer, self).setup_experiment() - # Check if config file was selected. - if self.app_state.args.config == '': - print('Please pass configuration file(s) as --c parameter') - exit(-1) - # Check the presence of the CUDA-compatible devices. if self.app_state.args.use_gpu and (torch.cuda.device_count() == 0): self.logger.error("Cannot use GPU as there are no CUDA-compatible devices present in the system!") + exit(-1) + + # Check if config file was selected. + if self.app_state.args.config == '': + print('Please pass configuration file(s) as --c parameter') exit(-2) - # Check if config file exists. - root_config = self.app_state.args.config - if not os.path.isfile(root_config): - print('Error: Configuration file {} does not exist'.format(root_config)) - exit(-3) + # Split and make them absolute. + root_configs = self.app_state.args.config.replace(" ", "").split(',') + # If there are - expand them to absolute paths. + abs_root_configs = [os.path.expanduser(config) for config in root_configs] - # Extract absolute path to main ptp 'config' directory. - abs_config_path = os.path.abspath(root_config) - # Save it in app_state! - self.app_state.absolute_config_path = abs_config_path[:abs_config_path.find("configs")+8] - # Get relative path. - rel_config_path = abs_config_path[abs_config_path.find("configs")+8:] - # Get the list of configurations which need to be loaded. - configs_to_load = config_parse.recurrent_config_parse(rel_config_path, [], self.app_state.absolute_config_path) + configs_to_load = config_parse.recurrent_config_parse(abs_root_configs, [], self.app_state.absolute_config_path) # Read the YAML files one by one - but in reverse order -> overwrite the first indicated config(s) - config_parse.reverse_order_config_load(self.config, configs_to_load, self.app_state.absolute_config_path) + config_parse.reverse_order_config_load(self.config, configs_to_load) # -> At this point, the Param Registry contains the configuration loaded (and overwritten) from several files. # Log the resulting training configuration. @@ -185,28 +177,28 @@ def setup_experiment(self): time_str = '{0:%Y%m%d_%H%M%S}'.format(datetime.now()) if self.app_state.args.savetag != '': time_str = time_str + "_" + self.app_state.args.savetag - self.log_dir = os.path.expanduser(self.app_state.args.expdir) + '/' + training_problem_type + '/' + pipeline_name + '/' + time_str + '/' + self.app_state.log_dir = os.path.expanduser(self.app_state.args.expdir) + '/' + training_problem_type + '/' + pipeline_name + '/' + time_str + '/' # Lowercase dir. - self.log_dir = self.log_dir.lower() - os.makedirs(self.log_dir, exist_ok=False) + self.app_state.log_dir = self.app_state.log_dir.lower() + os.makedirs(self.app_state.log_dir, exist_ok=False) except FileExistsError: sleep(1) else: break # Set log dir. - self.app_state.log_file = self.log_dir + 'trainer.log' + self.app_state.log_file = self.app_state.log_dir + 'trainer.log' # Initialize logger in app state. self.app_state.logger = logging.initialize_logger("AppState") # Add handlers for the logfile to worker logger. logging.add_file_handler_to_logger(self.logger) - self.logger.info("Logger directory set to: {}".format(self.log_dir )) + self.logger.info("Logger directory set to: {}".format(self.app_state.log_dir)) # Set cpu/gpu types. self.app_state.set_types() # Models dir. - self.checkpoint_dir = self.log_dir + 'checkpoints/' + self.checkpoint_dir = self.app_state.log_dir + 'checkpoints/' os.makedirs(self.checkpoint_dir, exist_ok=False) # Set random seeds in the training section. @@ -387,7 +379,7 @@ def initialize_statistics_collection(self): self.training.problem.add_statistics(self.training_stat_col) self.pipeline.add_statistics(self.training_stat_col) # Create the csv file to store the training statistics. - self.training_batch_stats_file = self.training_stat_col.initialize_csv_file(self.log_dir, 'training_statistics.csv') + self.training_batch_stats_file = self.training_stat_col.initialize_csv_file(self.app_state.log_dir, 'training_statistics.csv') # Create statistics aggregator for training. self.training_stat_agg = StatisticsAggregator() @@ -395,7 +387,7 @@ def initialize_statistics_collection(self): self.training.problem.add_aggregators(self.training_stat_agg) self.pipeline.add_aggregators(self.training_stat_agg) # Create the csv file to store the training statistic aggregations. - self.training_set_stats_file = self.training_stat_agg.initialize_csv_file(self.log_dir, 'training_set_agg_statistics.csv') + self.training_set_stats_file = self.training_stat_agg.initialize_csv_file(self.app_state.log_dir, 'training_set_agg_statistics.csv') # VALIDATION. # Create statistics collector for validation. @@ -404,7 +396,7 @@ def initialize_statistics_collection(self): self.validation.problem.add_statistics(self.validation_stat_col) self.pipeline.add_statistics(self.validation_stat_col) # Create the csv file to store the validation statistics. - self.validation_batch_stats_file = self.validation_stat_col.initialize_csv_file(self.log_dir, 'validation_statistics.csv') + self.validation_batch_stats_file = self.validation_stat_col.initialize_csv_file(self.app_state.log_dir, 'validation_statistics.csv') # Create statistics aggregator for validation. self.validation_stat_agg = StatisticsAggregator() @@ -412,7 +404,7 @@ def initialize_statistics_collection(self): self.validation.problem.add_aggregators(self.validation_stat_agg) self.pipeline.add_aggregators(self.validation_stat_agg) # Create the csv file to store the validation statistic aggregations. - self.validation_set_stats_file = self.validation_stat_agg.initialize_csv_file(self.log_dir, 'validation_set_agg_statistics.csv') + self.validation_set_stats_file = self.validation_stat_agg.initialize_csv_file(self.app_state.log_dir, 'validation_set_agg_statistics.csv') def finalize_statistics_collection(self): @@ -435,16 +427,16 @@ def initialize_tensorboard(self): # Create TensorBoard outputs - if TensorBoard is supposed to be used. if self.app_state.args.tensorboard is not None: from tensorboardX import SummaryWriter - self.training_batch_writer = SummaryWriter(self.log_dir + '/training') + self.training_batch_writer = SummaryWriter(self.app_state.log_dir + '/training') self.training_stat_col.initialize_tensorboard(self.training_batch_writer) - self.training_set_writer = SummaryWriter(self.log_dir + '/training_set_agg') + self.training_set_writer = SummaryWriter(self.app_state.log_dir + '/training_set_agg') self.training_stat_agg.initialize_tensorboard(self.training_set_writer) - self.validation_batch_writer = SummaryWriter(self.log_dir + '/validation') + self.validation_batch_writer = SummaryWriter(self.app_state.log_dir + '/validation') self.validation_stat_col.initialize_tensorboard(self.validation_batch_writer) - self.validation_set_writer = SummaryWriter(self.log_dir + '/validation_set_agg') + self.validation_set_writer = SummaryWriter(self.app_state.log_dir + '/validation_set_agg') self.validation_stat_agg.initialize_tensorboard(self.validation_set_writer) else: self.training_batch_writer = None diff --git a/setup.py b/setup.py index 17c668e..c3dba09 100644 --- a/setup.py +++ b/setup.py @@ -11,16 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""A setuptools based setup module. - -See: -https://packaging.python.org/en/latest/distributing.html -https://github.com/pypa/sampleproject -""" # Always prefer setuptools over distutils from setuptools import setup, find_packages -from os import path +from os import path,makedirs # io.open is needed for projects that support Python 2.7 # It ensures open() defaults to text mode with universal newlines, # and accepts an argument to specify the text encoding @@ -29,6 +23,16 @@ here = path.abspath(path.dirname(__file__)) +# Get path to configs. +configs_path = path.join(here,"configs/") +# Export path to config file in ~/.ptp/ folder. +ptp_path = path.expanduser("~/.ptp/") +# Make dir. +makedirs(path.dirname(ptp_path), exist_ok=True) +# Write path to configs. +with open(path.join(ptp_path, "config.txt"),"w") as file: + file.write(configs_path) + # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() @@ -61,7 +65,7 @@ # This is a one-line description or tagline of what your project does. This # corresponds to the "Summary" metadata field: # https://packaging.python.org/specifications/core-metadata/#summary - description='PyTorchPipe: framework for building PyTorch pipelines', + description='PyTorchPipe: framework for building multi-modal PyTorch pipelines', # This is an optional longer description of your project that represents # the body of text which users will see when they visit PyPI. @@ -89,7 +93,7 @@ # # This field corresponds to the "Home-Page" metadata field: # https://packaging.python.org/specifications/core-metadata/#home-page-optional - url='https://github.com/tkornut/pytorchpipe/', # Optional + url='https://github.com/IBM/pytorchpipe/', # Optional license='Apache 2.0', # This should be your name or the name of the organization which owns the @@ -116,20 +120,12 @@ # Indicate who your project is intended for 'Intended Audience :: Science/Research', 'Intended Audience :: Developers', - # 'Topic :: Software Development :: Build Tools', # Pick your license as you wish - # 'License :: OSI Approved :: MIT License', - # 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'License :: OSI Approved :: Apache Software License', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. - # 'Programming Language :: Python :: 2', - # 'Programming Language :: Python :: 2.7', - # 'Programming Language :: Python :: 3', - # 'Programming Language :: Python :: 3.4', - # 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Operating System :: Linux', @@ -140,7 +136,7 @@ # project page. What does your project relate to? # # Note that this is a string of words separated by whitespace, not a list. - keywords='pytorch pipeline component', # Optional + keywords='machine learning neural nets pytorch pipeline component problem model', # Optional # You can just specify package directories manually here if your project is # simple. Or you can use find_packages(). @@ -216,7 +212,7 @@ entry_points={ # Optional 'console_scripts': [ 'ptp-online-trainer=ptp.workers.online_trainer:main', - 'ptp-tester=ptp.workers.tester:main', + 'ptp-processor=ptp.workers.processor:main', ] }, diff --git a/tests/pipeline_tests.py b/tests/pipeline_tests.py index c453ccb..2748894 100644 --- a/tests/pipeline_tests.py +++ b/tests/pipeline_tests.py @@ -31,10 +31,6 @@ def __init__(self, *args, **kwargs): # Set required globals. app_state = AppState() app_state.__setitem__("bow_size", 10, override=True) - # Extract absolute path to config. - abs_config_path = os.path.realpath(__file__) - # Save it in app_state! - app_state.absolute_config_path = abs_config_path[:abs_config_path.find("tests")]+"configs/" def test_create_component_full_type(self): """ Tests whether component can be created when using full module name with 'path'. """ @@ -123,5 +119,5 @@ def test_priorities(self): self.assertEqual(pipe[1].name, 'bow_encoder2') -#if __name__ == "__main__": -# unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() \ No newline at end of file