double backticks around decimal and engine='python'

RahulHP · May 22, 2016 · 9f42d0c · 9f42d0c
2 parents dc8ca62 + b88eb35
commit 9f42d0c
Show file tree

Hide file tree

Showing 48 changed files with 1,921 additions and 647 deletions.
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -773,6 +773,21 @@ def setup(self):
     def time_groupby_transform_series2(self):
         self.df.groupby('id')['val'].transform(np.mean)
 
+
+class groupby_transform_dataframe(object):
+    # GH 12737
+    goal_time = 0.2
+
+    def setup(self):
+        self.df = pd.DataFrame({'group': np.repeat(np.arange(1000), 10),
+                                'B': np.nan,
+                                'C': np.nan})
+        self.df.ix[4::10, 'B':'C'] = 5
+
+    def time_groupby_transform_dataframe(self):
+        self.df.groupby('group').transform('first')
+
+
 class groupby_transform_cythonized(object):
     goal_time = 0.2
 

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
@@ -98,6 +98,7 @@ time.
 
    pd.Timestamp(datetime(2012, 5, 1))
    pd.Timestamp('2012-05-01')
+   pd.Timestamp(2012, 5, 1)
 
 However, in many cases it is more natural to associate things like change
 variables with a time span instead. The span represented by ``Period`` can be

diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -32,13 +32,22 @@ Other enhancements
 - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`)
 
 - ``Index`` now supports ``.str.extractall()`` which returns ``DataFrame``, see :ref:`Extract all matches in each subject (extractall) <text.extractall>` (:issue:`10008`, :issue:`13156`)
+- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`)
 
   .. ipython:: python
 
      idx = pd.Index(["a1a2", "b1", "c1"])
      idx.str.extractall("[ab](?P<digit>\d)")
 
-- The ``pd.read_csv()`` with engine='python' has gained support for the decimal option (:issue:`12933`)
+- ``Timestamp``s can now accept positional and keyword parameters like :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`)
+
+  .. ipython:: python
+
+    pd.Timestamp(2012, 1, 1)
+
+    pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30)
+
+- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`)
 
 .. _whatsnew_0182.api:
 
@@ -47,6 +56,8 @@ API changes
 
 
 - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception  (:issue:`10001`)
+- An ``UnsupportedFunctionCall`` error is now raised if numpy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`)
+- Calls to ``.sample()`` will respect the random seed set via ``numpy.random.seed(n)`` (:issue:`13161`)
 
 
 .. _whatsnew_0182.api.tolist:
@@ -77,13 +88,26 @@ New Behavior:
    type(s.tolist()[0])
 
 
+.. _whatsnew_0182.api.to_datetime_coerce:
+
+``.to_datetime()`` when coercing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+A bug is fixed in ``.to_datetime()`` when passing integers or floats, and no ``unit`` and ``errors='coerce'`` (:issue:`13180`).
+Previously if ``.to_datetime()`` encountered mixed integers/floats and strings, but no datetimes with ``errors='coerce'`` it would convert all to ``NaT``.
 
+Previous Behavior:
 
+.. code-block:: ipython
 
+   In [2]: pd.to_datetime([1, 'foo'], errors='coerce')
+   Out[2]: DatetimeIndex(['NaT', 'NaT'], dtype='datetime64[ns]', freq=None)
 
+This will now convert integers/floats with the default unit of ``ns``.
 
+.. ipython:: python
 
+   pd.to_datetime([1, 'foo'], errors='coerce')
 
 .. _whatsnew_0182.api.other:
 
@@ -106,32 +130,39 @@ Performance Improvements
 - increased performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`)
 
 
-
+- Improved performance of ``DataFrameGroupBy.transform`` (:issue:`12737`)
 
 
 .. _whatsnew_0182.bug_fixes:
 
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
 - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
 - Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
+- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
 - Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
+- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`)
 
 
 - Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`)
+- Bug in ``.to_records()`` when index name is a unicode string (:issue: `13172`)
 
+- Bug in calling ``.memory_usage()`` on object which doesn't implement (:issue:`12924`)
 
 - Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()``); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`)
 
-
+- Bug in ``SeriesGroupBy.transform`` with datetime values and missing groups (:issue:`13191`)
 
 - Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError``  (:issue:`13156`)
 
 
 - Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`)
-
+- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`)
+- Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`)
+- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame``appropriately when empty (:issue:`13212`)
 
 
 
@@ -149,4 +180,9 @@ Bug Fixes
 
 - Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`)
 - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
+- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
 - Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
+- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)
+
+
+- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -0,0 +1,83 @@
+.. _whatsnew_0190:
+
+v0.19.0 (????, 2016)
+--------------------
+
+This is a major release from 0.18.2 and includes a small number of API changes, several new features,
+enhancements, and performance improvements along with a large number of bug fixes. We recommend that all
+users upgrade to this version.
+
+Highlights include:
+
+
+Check the :ref:`API Changes <whatsnew_0190.api_breaking>` and :ref:`deprecations <whatsnew_0190.deprecations>` before updating.
+
+.. contents:: What's new in v0.19.0
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0190.enhancements:
+
+New features
+~~~~~~~~~~~~
+
+
+
+
+
+.. _whatsnew_0190.enhancements.other:
+
+Other enhancements
+^^^^^^^^^^^^^^^^^^
+
+
+
+
+
+
+.. _whatsnew_0190.api_breaking:
+
+Backwards incompatible API changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. _whatsnew_0190.api:
+
+
+
+
+
+
+Other API Changes
+^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0190.deprecations:
+
+Deprecations
+^^^^^^^^^^^^
+
+
+
+
+
+.. _whatsnew_0190.prior_deprecations:
+
+Removal of prior version deprecations/changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+
+
+
+.. _whatsnew_0190.performance:
+
+Performance Improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+
+
+
+.. _whatsnew_0190.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py
@@ -21,7 +21,7 @@
 from numpy import ndarray
 from pandas.util.validators import (validate_args, validate_kwargs,
                                     validate_args_and_kwargs)
-from pandas.core.common import is_integer
+from pandas.core.common import is_integer, UnsupportedFunctionCall
 from pandas.compat import OrderedDict
 
 
@@ -245,3 +245,77 @@ def validate_transpose_for_generic(inst, kwargs):
             msg += " for {klass} instances".format(klass=klass)
 
         raise ValueError(msg)
+
+
+def validate_window_func(name, args, kwargs):
+    numpy_args = ('axis', 'dtype', 'out')
+    msg = ("numpy operations are not "
+           "valid with window objects. "
+           "Use .{func}() directly instead ".format(func=name))
+
+    if len(args) > 0:
+        raise UnsupportedFunctionCall(msg)
+
+    for arg in numpy_args:
+        if arg in kwargs:
+            raise UnsupportedFunctionCall(msg)
+
+
+def validate_rolling_func(name, args, kwargs):
+    numpy_args = ('axis', 'dtype', 'out')
+    msg = ("numpy operations are not "
+           "valid with window objects. "
+           "Use .rolling(...).{func}() instead ".format(func=name))
+
+    if len(args) > 0:
+        raise UnsupportedFunctionCall(msg)
+
+    for arg in numpy_args:
+        if arg in kwargs:
+            raise UnsupportedFunctionCall(msg)
+
+
+def validate_expanding_func(name, args, kwargs):
+    numpy_args = ('axis', 'dtype', 'out')
+    msg = ("numpy operations are not "
+           "valid with window objects. "
+           "Use .expanding(...).{func}() instead ".format(func=name))
+
+    if len(args) > 0:
+        raise UnsupportedFunctionCall(msg)
+
+    for arg in numpy_args:
+        if arg in kwargs:
+            raise UnsupportedFunctionCall(msg)
+
+
+def validate_groupby_func(name, args, kwargs):
+    """
+    'args' and 'kwargs' should be empty because all of
+    their necessary parameters are explicitly listed in
+    the function signature
+    """
+    if len(args) + len(kwargs) > 0:
+        raise UnsupportedFunctionCall((
+            "numpy operations are not valid "
+            "with groupby. Use .groupby(...)."
+            "{func}() instead".format(func=name)))
+
+RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod',
+                       'mean', 'std', 'var')
+
+
+def validate_resampler_func(method, args, kwargs):
+    """
+    'args' and 'kwargs' should be empty because all of
+    their necessary parameters are explicitly listed in
+    the function signature
+    """
+    if len(args) + len(kwargs) > 0:
+        if method in RESAMPLER_NUMPY_OPS:
+            raise UnsupportedFunctionCall((
+                "numpy operations are not valid "
+                "with resample. Use .resample(...)."
+                "{func}() instead".format(func=method)))
+        else:
+            raise TypeError("too many arguments passed in")
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -127,7 +127,7 @@ def __sizeof__(self):
 
         # no memory_usage attribute, so fall back to
         # object's 'sizeof'
-        return super(self, PandasObject).__sizeof__()
+        return super(PandasObject, self).__sizeof__()
 
 
 class NoNewAttributesMixin(object):

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -41,6 +41,10 @@ class AmbiguousIndexError(PandasError, KeyError):
     pass
 
 
+class UnsupportedFunctionCall(ValueError):
+    pass
+
+
 class AbstractMethodError(NotImplementedError):
     """Raise this error instead of NotImplementedError for abstract methods
     while keeping compatibility with Python 2 and Python 3.
@@ -2058,7 +2062,7 @@ def _random_state(state=None):
     state : int, np.random.RandomState, None.
         If receives an int, passes to np.random.RandomState() as seed.
         If receives an np.random.RandomState object, just returns object.
-        If receives `None`, returns an np.random.RandomState object.
+        If receives `None`, returns np.random.
         If receives anything else, raises an informative ValueError.
         Default None.
 
@@ -2072,7 +2076,7 @@ def _random_state(state=None):
     elif isinstance(state, np.random.RandomState):
         return state
     elif state is None:
-        return np.random.RandomState()
+        return np.random
     else:
         raise ValueError("random_state must be an integer, a numpy "
                          "RandomState, or None")
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1062,7 +1062,7 @@ def to_records(self, index=True, convert_datetime64=True):
                         count += 1
             elif index_names[0] is None:
                 index_names = ['index']
-            names = index_names + lmap(str, self.columns)
+            names = lmap(str, index_names) + lmap(str, self.columns)
         else:
             arrays = [self[c].get_values() for c in self.columns]
             names = lmap(str, self.columns)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5299,7 +5299,7 @@ def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f):
     @Appender(_num_doc)
     def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None,
                   **kwargs):
-        nv.validate_stat_func(tuple(), kwargs)
+        nv.validate_stat_func(tuple(), kwargs, fname=name)
         if skipna is None:
             skipna = True
         if axis is None:
@@ -5319,7 +5319,7 @@ def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f):
     @Appender(_num_ddof_doc)
     def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
                   numeric_only=None, **kwargs):
-        nv.validate_stat_ddof_func(tuple(), kwargs)
+        nv.validate_stat_ddof_func(tuple(), kwargs, fname=name)
         if skipna is None:
             skipna = True
         if axis is None:
@@ -5340,7 +5340,7 @@ def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func,
     @Appender("Return cumulative {0} over requested axis.".format(name) +
               _cnum_doc)
     def cum_func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs):
-        nv.validate_cum_func(tuple(), kwargs)
+        nv.validate_cum_func(tuple(), kwargs, fname=name)
         if axis is None:
             axis = self._stat_axis_number
         else:
@@ -5374,7 +5374,7 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f):
     @Appender(_bool_doc)
     def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
                      **kwargs):
-        nv.validate_logical_func(tuple(), kwargs)
+        nv.validate_logical_func(tuple(), kwargs, fname=name)
         if skipna is None:
             skipna = True
         if axis is None: