Added property test setup and fixed additional flake8 issues

OpenSourceEconomics · Jul 17, 2018 · 7472153 · 7472153
1 parent 03b6eac
commit 7472153
Show file tree

Hide file tree

Showing 16 changed files with 335 additions and 76 deletions.
diff --git a/development/tests/property/property_auxiliary.py b/development/tests/property/property_auxiliary.py
@@ -0,0 +1,190 @@
+"""This module contains some auxiliary functions for the property testing."""
+from datetime import datetime
+import importlib
+import argparse
+import shutil
+import string
+import glob
+import os
+
+import numpy as np
+
+from grmpy.grmpy_config import PACKAGE_DIR
+
+
+def collect_tests():
+    """This function collects all available tests."""
+
+    current_wd = os.getcwd()
+    os.chdir(PACKAGE_DIR + '/test')
+    test_modules = glob.glob('test_*.py')
+    os.chdir(current_wd)
+    test_dict = dict()
+    for module in sorted(test_modules):
+        test_dict[module] = []
+        mod = importlib.import_module('grmpy.test.' + module.replace('.py', ''))
+        for candidate in sorted(dir(mod)):
+            if 'test' in candidate and 'py' not in candidate:
+                test_dict[module] += [candidate]
+    return test_dict
+
+
+def run_property_test(module, test, dirname=None):
+    """This function runs a single robustness test."""
+    mod = importlib.import_module('grmpy.test.' + module.replace('.py', ''))
+    test_fun = getattr(mod, test)
+
+    # We do not switch directories if we are investigating a failed test case.
+    if dirname is not None:
+        if os.path.exists(dirname):
+            shutil.rmtree(dirname)
+
+        os.mkdir(dirname)
+        os.chdir(dirname)
+
+    test_fun()
+
+
+def print_rslt_ext(start, timeout, rslt, err_msg):
+    """This function print out the current state of the property tests."""
+
+    start_time = start.strftime("%Y-%m-%d %H:%M:%S")
+    end_time = (start + timeout).strftime("%Y-%m-%d %H:%M:%S")
+    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    with open('property.grmpy.info', 'w') as outfile:
+
+        # Write out some header information.
+        outfile.write('\n\n')
+        str_ = '\t{0[0]:<15}{0[1]:<20}\n\n'
+        outfile.write(str_.format(['START', start_time]))
+        outfile.write(str_.format(['FINISH', end_time]))
+        outfile.write(str_.format(['UPDATE', current_time]))
+
+        modules = sorted(rslt.keys())
+
+        for module in modules:
+
+            outfile.write('\n ' + module.replace('.py', '') + '\n\n')
+
+            string = '{:>18}{:>15}{:>15}\n\n'
+            outfile.write(string.format('Test', 'Success', 'Failure'))
+
+            for test in sorted(rslt[module].keys()):
+
+                stat = rslt[module][test]
+
+                string = '{:>18}{:>15}{:>15}\n'
+                outfile.write(string.format(*[test] + stat))
+
+            outfile.write('\n')
+        outfile.write('\n' + '-' * 79 + '\n\n')
+
+        for err in err_msg:
+
+            module, test, seed, msg = err
+
+            string = 'MODULE {:<25} TEST {:<15} SEED {:<15}\n\n'
+            outfile.write(string.format(*[module, test, seed]))
+            outfile.write(msg)
+            outfile.write('\n' + '-' * 79 + '\n\n')
+
+
+def finish(rslt):
+    """This function simply finalizes the logging."""
+    # We want to record the overall performance.
+    num_tests_total, num_success = 0, 0
+    for module in sorted(rslt.keys()):
+        for test in sorted(rslt[module].keys()):
+            num_tests_total += np.sum(rslt[module][test])
+            num_success += rslt[module][test][0]
+
+    with open('property.grmpy.info', 'a') as outfile:
+        string = '{:>18}{:>15}\n'
+        outfile.write(string.format(*['Success', num_tests_total]))
+        outfile.write(string.format(*['Total', num_success]))
+
+        outfile.write('\n TERMINATED')
+
+
+def distribute_command_line_arguments(args):
+    """This function distributes the command line arguments."""
+    rslt = dict()
+    try:
+        rslt['num_tests'] = args.num_tests
+    except AttributeError:
+        pass
+
+    try:
+        rslt['request'] = args.request
+    except AttributeError:
+        pass
+
+    try:
+        rslt['hours'] = args.hours
+    except AttributeError:
+        pass
+
+    try:
+        rslt['seed'] = args.seed
+    except AttributeError:
+        pass
+
+    try:
+        rslt['is_update'] = args.is_update
+    except AttributeError:
+        pass
+
+    rslt['is_check'] = rslt['request'] in ['check', 'investigate']
+
+    return rslt
+
+
+def process_command_line_arguments(which):
+    """This function processes the command line arguments for the test battery."""
+    is_request, is_hours, is_seed, is_test, is_update = False, False, False, False, False
+
+    if which == 'robustness':
+        msg = 'Test robustness of package'
+        is_request, is_hours, is_seed = True, True, True
+    elif which == 'regression':
+        msg = 'Test package for regressions'
+        is_request, is_test, is_update = True, True, True
+    elif which == 'property':
+        msg = 'Property testing of package'
+        is_request, is_seed, is_hours = True, True, True
+    else:
+        raise NotImplementedError
+
+    parser = argparse.ArgumentParser(msg)
+
+    if is_request:
+        if which == 'regression':
+            parser.add_argument('--request', action='store', dest='request', help='task to perform',
+                                required=True, choices=['check', 'create'])
+        else:
+            parser.add_argument('--request', action='store', dest='request', help='task to perform',
+                                required=True, choices=['run', 'investigate'])
+
+    if is_hours:
+        parser.add_argument('--hours', action='store', dest='hours', type=float, help='hours')
+
+    if is_seed:
+        parser.add_argument('--seed', action='store', dest='seed', type=int, help='seed')
+
+    if is_test:
+        parser.add_argument('--tests', action='store', dest='num_tests', required=True, type=int,
+                            help='number of tests')
+
+    if is_update:
+        parser.add_argument('--update', action='store_true', dest='is_update', required=False,
+                            help='update regression vault')
+
+    return parser.parse_args()
+
+
+def get_random_string(size=6):
+    """This function samples a random string of varying size."""
+    chars = list(string.ascii_lowercase)
+    str_ = ''.join(np.random.choice(chars) for _ in range(size))
+    return str_
diff --git a/development/tests/property/run.py b/development/tests/property/run.py
@@ -1,35 +1,91 @@
-#!/usr/bin/env python
 """The test provides the basic capabilities to run numerous property tests."""
-import datetime
+from datetime import timedelta
+from datetime import datetime
+import functools
+import traceback
+import shutil
+import random
+import os
 
-import subprocess
+import numpy as np
 
-from grmpy.test.random_init import generate_random_dict
-from grmpy.test.random_init import print_dict
-import grmpy
+from grmpy.test.auxiliary import cleanup
+from property_auxiliary import distribute_command_line_arguments
+from property_auxiliary import process_command_line_arguments
+from property_auxiliary import get_random_string
+from property_auxiliary import run_property_test
+from property_auxiliary import print_rslt_ext
+from property_auxiliary import collect_tests
+from property_auxiliary import finish
 
-# We simply specify a minimum number of minutes for our package to run with different requests.
-MINUTES = 1
 
-end_time = datetime.datetime.now() + datetime.timedelta(minutes=MINUTES)
-counter = 1
-while True:
-    if datetime.datetime.now() >= end_time:
-        break
+def choose_module(inp_dict):
+    """Chooses a module with probability proportional to number of stored tests."""
+    prob_dist = np.array([])
+    for module in inp_dict.keys():
+        prob_dist = np.append(prob_dist, len(inp_dict[module]))
+    prob_dist = prob_dist / np.sum(prob_dist)
+    return np.random.choice(list(inp_dict.keys()), p=prob_dist)
 
-    print('\n Iteration ', counter)
 
-    constr = dict()
+def run(args):
+    """This function runs the property test battery."""
+    args = distribute_command_line_arguments(args)
 
-    constr['DETERMINISTIC'] = False
-    dict_ = generate_random_dict(constr)
-    print_dict(dict_)
+    test_dict = collect_tests()
 
-    grmpy.simulate('test.grmpy.ini')
+    rslt = dict()
+    for module in test_dict.keys():
+        rslt[module] = dict()
+        for test in test_dict[module]:
+            rslt[module][test] = [0, 0]
 
-    # This is a temporary fix so that the determination of starting values by PROBIT does
-    # not work if we have a perfect separation.
-    grmpy.estimate('test.grmpy.ini')
-    subprocess.check_call(['git', 'clean', '-d', '-f'])
+    cleanup()
 
-    counter += 1
+    if args['is_check']:
+        np.random.seed(args['seed'])
+        module = choose_module(test_dict)
+        test = np.random.choice(test_dict[module])
+        run_property_test(module, test)
+
+    else:
+        err_msg = []
+
+        start, timeout = datetime.now(), timedelta(hours=args['hours'])
+
+        print_rslt = functools.partial(print_rslt_ext, start, timeout)
+        print_rslt(rslt, err_msg)
+
+        while True:
+
+            seed = random.randrange(1, 100000)
+            dirname = get_random_string()
+            np.random.seed(seed)
+            module = choose_module(test_dict)
+            test = np.random.choice(test_dict[module])
+
+            try:
+                run_property_test(module, test, dirname)
+                rslt[module][test][0] += 1
+            except Exception:
+                rslt[module][test][1] += 1
+                msg = traceback.format_exc()
+                err_msg += [(module, test, seed, msg)]
+
+            os.chdir('../')
+
+            shutil.rmtree(dirname)
+
+            print_rslt(rslt, err_msg)
+
+            if timeout < datetime.now() - start:
+                break
+
+        finish(rslt)
+
+
+if __name__ == '__main__':
+
+    args = process_command_line_arguments('property')
+
+    run(args)
diff --git a/development/tests/reliability/reliability.py b/development/tests/reliability/reliability.py
@@ -3,19 +3,15 @@
 used. Additionally the module creates two different figures for the reliability section of the
 documentation.
 """
-import warnings
 import os
 
-warnings.simplefilter(action='ignore', category=FutureWarning)
-
 import matplotlib.pyplot as plt
 import statsmodels.api as sm
 from os.path import join
 from shutil import move
 import pandas as pd
 import numpy as np
 
-
 from grmpy.simulate.simulate_auxiliary import simulate_unobservables
 from grmpy.test.random_init import print_dict
 from grmpy.estimate.estimate import estimate
@@ -31,8 +27,8 @@ def create_data():
 
     # Distribute information
     indicator, dep = init_dict['ESTIMATION']['indicator'], init_dict['ESTIMATION']['dependent']
-    label_out = [init_dict['varnames'][j-1] for j in init_dict['TREATED']['order']]
-    label_choice = [init_dict['varnames'][j-1] for j in init_dict['CHOICE']['order']]
+    label_out = [init_dict['varnames'][j - 1] for j in init_dict['TREATED']['order']]
+    label_choice = [init_dict['varnames'][j - 1] for j in init_dict['CHOICE']['order']]
     seed = init_dict['SIMULATION']['seed']
 
     # Set random seed to ensure recomputabiltiy
@@ -70,7 +66,7 @@ def update_correlation_structure(model_dict, rho):
 
     # We print out the specification to an initialization file with the name mc_init.grmpy.ini.
     for key_ in ['TREATED', 'UNTREATED', 'CHOICE']:
-        x = [model_dict['varnames'][j-1] for j in model_dict[key_]['order']]
+        x = [model_dict['varnames'][j - 1] for j in model_dict[key_]['order']]
         model_dict[key_]['order'] = x
     print_dict(model_dict, 'reliability')
 
@@ -80,7 +76,7 @@ def get_effect_grmpy(file):
     df = pd.read_pickle(file)
     dict_ = read('reliability.grmpy.ini')
     beta_diff = dict_['TREATED']['all'] - dict_['UNTREATED']['all']
-    covars = [dict_['varnames'][j-1] for j in dict_['TREATED']['order']]
+    covars = [dict_['varnames'][j - 1] for j in dict_['TREATED']['order']]
     ATE = np.mean(np.dot(df[covars], beta_diff))
 
     return ATE

diff --git a/development/tests/robustness/replication.ipynb b/development/tests/robustness/replication.ipynb
@@ -21,7 +21,6 @@
    "source": [
     "from run import plot_est_mte\n",
     "import pandas as pd\n",
-    "import numpy as np\n",
     "import grmpy"
    ]
   },

diff --git a/development/tests/robustness/run.py b/development/tests/robustness/run.py
@@ -23,7 +23,7 @@ def plot_est_mte(rslt, data_frame):
 
     # Calculate the MTE
     mte = calculate_mte(rslt, data_frame, quantiles)
-    mte = [i/4 for i in mte]
+    mte = [i / 4 for i in mte]
 
     # Plot both curves
     ax = plt.figure().add_subplot(111)

diff --git a/grmpy/check/auxiliary.py b/grmpy/check/auxiliary.py
@@ -45,7 +45,7 @@ def check_special_conf(dict_):
                     if any(i >= 0.9 for i in x[2]):
                         msg = str_.format(x[0], 'a specific category')
                         invalid = True
-                    elif not np.isclose(sum(x[2]), 1., 0.01):
+                    elif not np.isclose(sum(x[2]), 1., 0.1):
                         msg = 'The specified probability for all possible categories of a ' \
                               'categorical variable have to sum up to 1.'
                         invalid = True