Python 3 compatibility

alteryx · Oct 17, 2017 · 2d327e5 · 2d327e5
1 parent ddba9c3
commit 2d327e5
Show file tree

Hide file tree

Showing 44 changed files with 258 additions and 175 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -3,8 +3,9 @@ jobs:
   build:
     working_directory: ~/featuretools
     docker:
-        - image: python:2.7.14
+        - image: themattrix/tox
     steps:
       - checkout
-      - run: make installdeps
-      - run: make coverage && codecov
+      - run: pyenv local 3.6.0
+      - run: pip install --upgrade pip && pip install -r test-requirements.txt
+      - run: tox && codecov
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,4 @@
+include requirements.txt
+include setup-requirements.txt
+include test-requirements.txt
+
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -2,10 +2,5 @@
 Sphinx==1.6.4
 sphinx_rtd_theme==0.2.4
 nbsphinx==0.2.14
-pytest-xdist>=1.20.1
 jupyter==1.0.0
 rst2pdf==0.93
-pytest-cov===2.5.1
-codecov==2.0.9
-flake8==3.4.1
-isort==4.2.15
diff --git a/docs/source/upload.py b/docs/source/upload.py
@@ -1,3 +1,6 @@
+from __future__ import print_function
+from builtins import input
+from builtins import str
 import sys
 from subprocess import call
 
@@ -30,7 +33,7 @@ def query_yes_no(question, default="yes"):
 
     while True:
         sys.stdout.write(question + prompt)
-        choice = raw_input().lower()
+        choice = input().lower()
         if default is not None and choice == '':
             return valid[default]
         elif choice in valid:
@@ -43,11 +46,11 @@ def query_yes_no(question, default="yes"):
 def upload(root=False):
     # build html
     if not query_yes_no("Upload Release: %s" % str(release)):
-        print "Not uploading"
+        print("Not uploading")
         return
 
     if root and not query_yes_no("Upload to root?"):
-        print "Not uploading"
+        print("Not uploading")
         return
 
     call(["make", "clean", "html"])

diff --git a/featuretools/__init__.py b/featuretools/__init__.py
@@ -1,7 +1,8 @@
+from __future__ import absolute_import
 # flake8: noqa
-import config
+from . import config
 from .core import *
-import variable_types
+from . import variable_types
 from .entityset.api import *
 from . import primitives
 from .synthesis.api import *

diff --git a/featuretools/computational_backends/base_backend.py b/featuretools/computational_backends/base_backend.py
@@ -1,2 +1,5 @@
+from builtins import object
+
+
 class ComputationalBackend(object):
     pass
diff --git a/featuretools/computational_backends/calculate_feature_matrix.py b/featuretools/computational_backends/calculate_feature_matrix.py
@@ -1,11 +1,16 @@
+from __future__ import division
+
 import gc
 import logging
 import os
 import shutil
+from builtins import zip
 from collections import defaultdict
 from datetime import datetime
 from functools import wraps
 
+from past.utils import old_div
+
 import numpy as np
 import pandas as pd
 from pandas.tseries.frequencies import to_offset
@@ -435,7 +440,7 @@ def datetime_round(dt, freq, round_up=False):
         round_f = np.floor
     dt = pd.DatetimeIndex(dt)
     freq = to_offset(freq).delta.value
-    return pd.DatetimeIndex(((round_f(dt.asi8 / (float(freq))) * freq).astype(np.int64)))
+    return pd.DatetimeIndex(((round_f(old_div(dt.asi8, (float(freq)))) * freq).astype(np.int64)))
 
 
 def gather_approximate_features(features):

diff --git a/featuretools/computational_backends/feature_tree.py b/featuretools/computational_backends/feature_tree.py
@@ -1,5 +1,6 @@
 import itertools
 import logging
+from builtins import object
 from collections import defaultdict
 
 from ..utils import gen_utils as utils
@@ -32,7 +33,7 @@ def __init__(self, entityset, features, ignored=None):
             for dep in deps:
                 all_features[dep.hash()] = dep
                 feature_deps[dep.hash()] = dep.get_deep_dependencies(ignored=ignored)
-        self.all_features = all_features.values()
+        self.all_features = list(all_features.values())
         self.feature_deps = feature_deps
 
         self._generate_feature_tree(features)
@@ -41,7 +42,7 @@ def __init__(self, entityset, features, ignored=None):
 
     def get_all_features(self):
         all_features = []
-        for e, groups in self.ordered_feature_groups.iteritems():
+        for e, groups in self.ordered_feature_groups.items():
             for g in groups:
                 for f in g:
                     all_features.append(f)
@@ -71,7 +72,7 @@ def _order_entities(self):
         dependencies.
         """
         entity_deps = defaultdict(set)
-        for e, features in self.top_level_features.iteritems():
+        for e, features in self.top_level_features.items():
             # iterate over all dependency features of the top-level features on
             # this entity. If any of these are themselves top-level features, add
             # their entities as dependencies of the current entity.
@@ -150,7 +151,7 @@ def _get_feature_depths(self, entity_id):
                 order[dep.hash()] = min(order[f.hash()] - 1, order[dep.hash()])
                 queue.append(dep)
 
-        return features.values(), out
+        return list(features.values()), out
 
 
 # These functions are used for sorting and grouping features

diff --git a/featuretools/computational_backends/pandas_backend.py b/featuretools/computational_backends/pandas_backend.py
@@ -1,5 +1,5 @@
 import cProfile
-import cStringIO
+import io
 import logging
 import os
 import pstats
@@ -8,6 +8,8 @@
 import warnings
 from datetime import datetime
 
+from future import standard_library
+
 import numpy as np
 import pandas as pd
 
@@ -27,6 +29,7 @@
 # progress bar
 from featuretools.utils.gen_utils import make_tqdm_iterator
 
+standard_library.install_aliases()
 warnings.simplefilter('ignore', np.RankWarning)
 warnings.simplefilter("ignore", category=RuntimeWarning)
 logger = logging.getLogger('featuretools.computational_backend')
@@ -174,7 +177,7 @@ def calculate_all_features(self, instance_ids, time_last,
         # debugging
         if profile:
             pr.disable()
-            s = cStringIO.StringIO()
+            s = io.StringIO()
             ps = pstats.Stats(pr, stream=s).sort_stats("cumulative", "tottime")
             ps.print_stats()
             prof_folder_path = os.path.join(ROOT_DIR, 'prof')
@@ -285,7 +288,7 @@ def _calculate_direct_features(self, features, entity_frames):
             col_map[f.base_features[0].get_name()] = f.get_name()
 
         # merge the identity feature from the parent entity into the child
-        merge_df = parent_df[col_map.keys()].rename(columns=col_map)
+        merge_df = parent_df[list(col_map.keys())].rename(columns=col_map)
         if index_as_feature is not None:
             merge_df.set_index(index_as_feature.get_name(), inplace=True, drop=False)
         else:
@@ -435,7 +438,7 @@ def inner(x):
                                 for n1, n2 in to_merge.columns.ravel()]
             # to enable a rename
             to_merge = to_merge.rename(columns=agg_rename)
-            variables = agg_rename.values()
+            variables = list(agg_rename.values())
             to_merge = to_merge[variables]
             frame = pd.merge(left=frame, right=to_merge,
                              left_on=index_var, right_index=True, how='left')

diff --git a/featuretools/config.py b/featuretools/config.py
@@ -53,8 +53,8 @@ def initialize_logging(config):
     err_handler.setFormatter(logging.Formatter(fmt))
     err_levels = ['WARNING', 'ERROR', 'CRITICAL']
 
-    for name, level in loggers.items():
-        LEVEL = logging._levelNames[level.upper()]
+    for name, level in list(loggers.items()):
+        LEVEL = getattr(logging, level.upper())
         logger = logging.getLogger(name)
         logger.setLevel(LEVEL)
         for _handler in logger.handlers:

diff --git a/featuretools/core/base.py b/featuretools/core/base.py
@@ -1,4 +1,5 @@
 import copy
+from builtins import object
 
 
 class FTBase(object):
@@ -7,10 +8,10 @@ def normalize(self, normalizer, remove_entityset=True):
         d = copy.copy(self.__dict__)
         if remove_entityset:
             from featuretools.entityset.entityset import EntitySet
-            for k, v in d.iteritems():
+            for k, v in d.items():
                 if isinstance(v, EntitySet):
                     d[k] = v.id
-        d = {k: normalizer(v) for k, v in d.iteritems()}
+        d = {k: normalizer(v) for k, v in d.items()}
         if hasattr(self, 'id'):
             d['id'] = self.id
         return d
@@ -20,7 +21,7 @@ def denormalize(cls, d, denormalizer=None, entityset=None):
         d = copy.copy(d)
         if denormalizer:
             d = {k: denormalizer(v, denormalizer=denormalizer, entityset=entityset)
-                 for k, v in d.iteritems()}
+                 for k, v in d.items()}
 
         if entityset and 'entityset' in d:
             d['entityset'] = entityset

diff --git a/featuretools/demo/flight.py b/featuretools/demo/flight.py
@@ -1,4 +1,5 @@
 import os
+from builtins import str
 
 import dask.dataframe as dd
 import pandas as pd

diff --git a/featuretools/demo/mock_customer.py b/featuretools/demo/mock_customer.py
@@ -1,3 +1,9 @@
+from __future__ import division
+
+from builtins import range
+
+from past.utils import old_div
+
 import pandas as pd
 from numpy import random
 from numpy.random import choice
@@ -27,7 +33,7 @@ def load_mock_customer(n_customers=5, n_products=5, n_sessions=35, n_transaction
     transactions_df = transactions_df.sort_values("session_id").reset_index(drop=True)
     transactions_df["transaction_time"] = pd.date_range('1/1/2014', periods=n_transactions, freq='65s')  # todo make these less regular
     transactions_df["product_id"] = pd.Categorical(choice(products_df["product_id"], n_transactions))
-    transactions_df["amount"] = random.randint(500, 15000, n_transactions) / 100.0
+    transactions_df["amount"] = old_div(random.randint(500, 15000, n_transactions), 100.0)
 
     # calculate and merge in session start
     # based on the times we came up with for transactions

diff --git a/featuretools/demo/retail.py b/featuretools/demo/retail.py
@@ -1,4 +1,5 @@
 import os
+from builtins import str
 
 import pandas as pd
 

diff --git a/featuretools/entityset/base_entity.py b/featuretools/entityset/base_entity.py
@@ -1,4 +1,9 @@
+from __future__ import print_function
+
 import logging
+from builtins import map
+
+from past.builtins import basestring
 
 import pandas as pd
 
@@ -55,9 +60,9 @@ def __init__(self, id, entityset, variable_types=None, name=None,
         link_vars = [v.id for rel in relationships for v in [rel.parent_variable, rel.child_variable]
                      if v.entity.id == self.id]
 
-        inferred_variable_types = self.infer_variable_types(ignore=variable_types.keys(),
+        inferred_variable_types = self.infer_variable_types(ignore=list(variable_types.keys()),
                                                             link_vars=link_vars)
-        for var_id, desired_type in variable_types.iteritems():
+        for var_id, desired_type in variable_types.items():
             if isinstance(desired_type, tuple):
                 desired_type = desired_type[0]
             inferred_variable_types.update({var_id: desired_type})
@@ -178,7 +183,7 @@ def head(self, n=10, cutoff_time=None):
             from featuretools.computational_backends.calculate_feature_matrix import calculate_feature_matrix
             from featuretools.features import Feature
 
-            row = map(Feature, self.variables)
+            row = list(map(Feature, self.variables))
             instance_ids = self.entityset.get_top_n_instances(self.id, n)
             cutoff_time = pd.DataFrame({'instance_id': instance_ids})
             cutoff_time['time'] = cutoff_time
@@ -228,14 +233,14 @@ def add_variable_statistics(self, var_id):
                 value = self.get_column_stat(var_id, stat)
                 setattr(self._get_variable(var_id), stat, value)
             except TypeError as e:
-                print e
+                print(e)
 
         stats = vartype._computed_stats
         for stat in stats:
             try:
                 setattr(self._get_variable(var_id), stat, value)
             except TypeError as e:
-                print e
+                print(e)
 
     def get_column_stat(self, variable_id, stat):
         raise NotImplementedError()

diff --git a/featuretools/entityset/base_entityset.py b/featuretools/entityset/base_entityset.py
@@ -1,4 +1,5 @@
 import logging
+from builtins import object
 
 from featuretools import variable_types as vtypes
 from featuretools.core.base import FTBase
@@ -87,7 +88,7 @@ def __getitem__(self, entity_id):
 
     @property
     def entities(self):
-        return self.entity_stores.values()
+        return list(self.entity_stores.values())
 
     def _get_entity(self, entity_id):
         """Get entity instance from entityset
@@ -150,7 +151,7 @@ def __repr__(self):
             num_left = len(self.relationships) - 5
             repr_out += u"\n    ...and {} more".format(num_left)
 
-        return repr_out.encode("utf-8")
+        return repr_out
 
     def delete_entity_variables(self, entity_id, variables, **kwargs):
         entity = self._get_entity(entity_id)