Skip to content

Commit

Permalink
Merge pull request #81 from HDI-Project/bcyphers/python3
Browse files Browse the repository at this point in the history
Upgrade to Python 3
  • Loading branch information
Bennett Cyphers committed Feb 13, 2018
2 parents 9b201ec + 4bf64fb commit 959bdc2
Show file tree
Hide file tree
Showing 17 changed files with 81 additions and 55 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ jobs:
- checkout
- run: apt-get -qq update
- run: apt-get -qq -y install git mysql-client libmysqlclient-dev
- run: pyenv local 2.7.13 # 3.5.2 3.6.0
- run: pyenv local 2.7.13 3.5.2 3.6.0
- run: make installdeps
- run: make lint && tox && codecov
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ This section describes the quickest way to get started with ATM on a modern mach
```

3. **Install python dependencies**
- python=2.7
ATM is tested with Python 2.7+ and Python 3.5+.
```
$ virtualenv venv
$ . venv/bin/activate
Expand Down
1 change: 1 addition & 0 deletions atm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
A multi-user, multi-data AutoML framework.
"""
from __future__ import absolute_import
from __future__ import unicode_literals
import logging
import os

Expand Down
15 changes: 8 additions & 7 deletions atm/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals

import logging
import os
import re
import socket
import sys
from argparse import ArgumentError, ArgumentTypeError, RawTextHelpFormatter
from builtins import map, object, str

import yaml

Expand Down Expand Up @@ -233,7 +234,7 @@ def add_arguments_logging(parser):
help='If set, compute full ROC and PR curves and '
'per-label metrics for each classifier')

log_levels = map(str.lower, LOG_LEVELS.keys())
log_levels = list(map(str.lower, list(LOG_LEVELS.keys())))
parser.add_argument('--log-level-file', choices=log_levels,
help='minimum log level to write to the log file')
# if this is being called from the command line, print more information to
Expand Down Expand Up @@ -516,7 +517,7 @@ def load_config(sql_path=None, run_path=None, aws_path=None, log_path=None, **kw
# kwargs are most likely generated by argparse.
# Any unspecified argparse arguments will be None, so ignore those. We only
# care about arguments explicitly specified by the user.
kwargs = {k: v for k, v in kwargs.items() if v is not None}
kwargs = {k: v for k, v in list(kwargs.items()) if v is not None}

# check the keyword args for config paths
sql_path = sql_path or kwargs.get('sql_config')
Expand All @@ -541,13 +542,13 @@ def load_config(sql_path=None, run_path=None, aws_path=None, log_path=None, **kw
log_args = yaml.load(f)

# Use keyword args to override yaml config values
sql_args.update({k.replace('sql_', ''): v for k, v in kwargs.items()
sql_args.update({k.replace('sql_', ''): v for k, v in list(kwargs.items())
if 'sql_' in k})
aws_args.update({k.replace('aws_', ''): v for k, v in kwargs.items()
aws_args.update({k.replace('aws_', ''): v for k, v in list(kwargs.items())
if 'aws_' in k})
run_args.update({k: v for k, v in kwargs.items() if k in
run_args.update({k: v for k, v in list(kwargs.items()) if k in
RunConfig.PARAMETERS})
log_args.update({k: v for k, v in kwargs.items() if k in
log_args.update({k: v for k, v in list(kwargs.items()) if k in
LogConfig.PARAMETERS})

# It's ok if there are some extra arguments that get passed in here; only
Expand Down
13 changes: 7 additions & 6 deletions atm/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals

import logging
import os
from builtins import object

from . import PROJECT_ROOT

Expand Down Expand Up @@ -84,32 +85,32 @@
}


class ClassifierStatus:
class ClassifierStatus(object):
RUNNING = 'running'
ERRORED = 'errored'
COMPLETE = 'complete'


class RunStatus:
class RunStatus(object):
PENDING = 'pending'
RUNNING = 'running'
COMPLETE = 'complete'


class PartitionStatus:
class PartitionStatus(object):
INCOMPLETE = 'incomplete'
GRIDDING_DONE = 'gridding_done'
ERRORED = 'errored'


class FileType:
class FileType(object):
LOCAL = 'local'
S3 = 's3'
HTTP = 'http'


# these are the strings that are used to index into results dictionaries
class Metrics:
class Metrics(object):
ACCURACY = 'accuracy'
RANK_ACCURACY = 'rank_accuracy'
COHEN_KAPPA = 'cohen_kappa'
Expand Down
7 changes: 4 additions & 3 deletions atm/database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals

import json
import pickle
from builtins import object
from datetime import datetime
from operator import attrgetter

Expand Down Expand Up @@ -278,7 +279,7 @@ def mu_sigma_judgment_metric(self):

def __repr__(self):
params = ', '.join(['%s: %s' % i for i in
self.hyperparameter_values.items()])
list(self.hyperparameter_values.items())])
return "<id=%d, params=(%s)>" % (self.id, params)

Datarun.classifiers = relationship('Classifier',
Expand Down Expand Up @@ -334,7 +335,7 @@ def from_csv(self, path):

for _, r in df.iterrows():
# replace NaN and NaT with None
for k, v in r.items():
for k, v in list(r.items()):
if pd.isnull(v):
r[k] = None

Expand Down
9 changes: 7 additions & 2 deletions atm/encoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from __future__ import division, unicode_literals

from builtins import object

import numpy as np
import pandas as pd
from past.utils import old_div
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


Expand All @@ -21,7 +26,7 @@ def __init__(self, class_column, train_path, test_path=None):
for c in data.columns:
if data[c].dtype == 'object':
total_features += len(np.unique(data[c])) - 1
majority_percentage = float(max(counts)) / float(sum(counts))
majority_percentage = old_div(float(max(counts)), float(sum(counts)))

self.n_examples = data.shape[0]
self.d_features = total_features
Expand Down Expand Up @@ -97,7 +102,7 @@ def transform(self, data):
features = data[self.feature_columns]

# encode each categorical feature as an integer
for column, encoder in self.column_encoders.items():
for column, encoder in list(self.column_encoders.items()):
features[column] = encoder.transform(features[column])

# one-hot encode the categorical features
Expand Down
9 changes: 6 additions & 3 deletions atm/enter_data.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals

import logging
import os
from builtins import map
from datetime import datetime, timedelta

from past.utils import old_div

from .config import *
from .constants import *
from .database import Database
Expand Down Expand Up @@ -45,7 +48,7 @@ def create_dataset(db, run_config, aws_config=None):
k_classes=meta.k_classes,
d_features=meta.d_features,
majority=meta.majority,
size_kb=meta.size / 1000)
size_kb=old_div(meta.size, 1000))
return dataset


Expand Down Expand Up @@ -128,7 +131,7 @@ def enter_data(sql_config, run_config, aws_config=None,
datarun = create_datarun(db, dataset, run_config)

logger.debug('saving hyperpartions...')
for method, parts in method_parts.items():
for method, parts in list(method_parts.items()):
for part in parts:
# if necessary, create a new datarun for each hyperpartition.
# This setting is useful for debugging.
Expand Down
12 changes: 6 additions & 6 deletions atm/method.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals

import json
from builtins import str as newstr
from builtins import object
from builtins import object, range
from os.path import join

from .constants import METHOD_PATH, METHODS_MAP
Expand Down Expand Up @@ -158,14 +158,14 @@ def __init__(self, method):

# create hyperparameters from the parameter config
self.parameters = {}
for k, v in config['hyperparameters'].items():
for k, v in list(config['hyperparameters'].items()):
param_type = HYPERPARAMETER_TYPES[v['type']]
self.parameters[k] = param_type(name=k, **v)

# List hyperparameters are special. These are replaced in the
# CPT with a size hyperparameter and sets of element hyperparameters
# conditioned on the size.
for name, param in self.parameters.items():
for name, param in list(self.parameters.items()):
if type(param) == List:
elements, conditions = param.get_elements()
for e in elements:
Expand All @@ -182,8 +182,8 @@ def __init__(self, method):
self.root_params.remove(param.name)

# if this is a conditional param, replace it there instead
for var, cond in self.conditions.items():
for val, deps in cond.items():
for var, cond in list(self.conditions.items()):
for val, deps in list(cond.items()):
if param.name in deps:
deps.append(param.length.name)
deps.remove(param.name)
Expand Down
9 changes: 6 additions & 3 deletions atm/metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals

from builtins import range

import numpy as np
import pandas as pd
from past.utils import old_div
from sklearn.metrics import (accuracy_score, average_precision_score,
cohen_kappa_score, f1_score, matthews_corrcoef,
precision_recall_curve, roc_auc_score, roc_curve)
Expand Down Expand Up @@ -34,7 +37,7 @@ def rank_n_accuracy(y_true, y_prob_mat, n=0.33):
if y_true[i] in rankings[i, :]:
correct_sample_count += 1

return correct_sample_count / num_samples
return old_div(correct_sample_count, num_samples)


def get_per_class_matrix(y, classes=None):
Expand Down Expand Up @@ -94,7 +97,7 @@ def get_metrics_binary(y_true, y_pred, y_pred_probs, include_curves=False):
any_probs_nan = np.any(np.isnan(y_pred_probs))
if not any_probs_nan:
# AP can be computed even if all labels are the same
y_true_bin = get_per_class_matrix(y_true, range(2))
y_true_bin = get_per_class_matrix(y_true, list(range(2)))
results[Metrics.AP] = average_precision_score(y_true_bin, y_pred_probs)

if not all_labels_same:
Expand Down
14 changes: 8 additions & 6 deletions atm/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
:synopsis: Model around classification method.
"""
from __future__ import absolute_import
from __future__ import absolute_import, division, unicode_literals

import logging
import re
import time
from builtins import object
from collections import defaultdict
from importlib import import_module

import numpy as np
import pandas as pd
from past.utils import old_div
from sklearn import decomposition
from sklearn.gaussian_process.kernels import (RBF, ConstantKernel,
ExpSineSquared, Matern,
Expand Down Expand Up @@ -98,9 +100,9 @@ def make_pipeline(self):
steps = []

# create a classifier with specified parameters
hyperparameters = {k: v for k, v in self.params.iteritems()
hyperparameters = {k: v for k, v in list(self.params.items())
if k not in Model.ATM_KEYS}
atm_params = {k: v for k, v in self.params.iteritems()
atm_params = {k: v for k, v in list(self.params.items())
if k in Model.ATM_KEYS}

# do special conversions
Expand Down Expand Up @@ -157,7 +159,7 @@ def test_final_model(self, X, y):
# time the prediction
start_time = time.time()
total = time.time() - start_time
self.avg_predict_time = total / float(len(y))
self.avg_predict_time = old_div(total, float(len(y)))

# TODO: this is hacky. See https://github.com/HDI-Project/ATM/issues/48
binary = self.num_classes == 2
Expand Down Expand Up @@ -246,7 +248,7 @@ def special_conversions(self, params):
# create list parameters
lists = defaultdict(list)
element_regex = re.compile('(.*)\[(\d)\]')
for name, param in params.items():
for name, param in list(params.items()):
# look for variables of the form "param_name[1]"
match = element_regex.match(name)
if match:
Expand All @@ -259,7 +261,7 @@ def special_conversions(self, params):
# drop the element parameter from our list
del params[name]

for lname, items in lists.items():
for lname, items in list(lists.items()):
# drop the list size parameter
del params['len(%s)' % lname]

Expand Down
4 changes: 2 additions & 2 deletions atm/tests/unit_tests/test_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_enumerate():
hps = Method(config_path).get_hyperpartitions()

assert len(hps) == 12
assert all('a' in zip(*hp.categoricals)[0] for hp in hps)
assert all('a' in list(zip(*hp.categoricals))[0] for hp in hps)
assert all(('f', 0.5) in hp.constants for hp in hps)
assert len([hp for hp in hps if hp.tunables
and 'b' in zip(*hp.tunables)[0]]) == 1
and 'b' in list(zip(*hp.tunables))[0]]) == 1

0 comments on commit 959bdc2

Please sign in to comment.