Fix what's new

RahulHP · May 15, 2016 · b560fda · b560fda
2 parents dc7acd1 + 2de2884
commit b560fda
Show file tree

Hide file tree

Showing 45 changed files with 1,079 additions and 316 deletions.
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -423,7 +423,7 @@ class frame_get_dtype_counts(object):
     goal_time = 0.2
 
     def setup(self):
-        self.df = pandas.DataFrame(np.random.randn(10, 10000))
+        self.df = DataFrame(np.random.randn(10, 10000))
 
     def time_frame_get_dtype_counts(self):
         self.df.get_dtype_counts()
@@ -985,3 +985,14 @@ def setup(self):
 
     def time_series_string_vector_slice(self):
         self.s.str[:5]
+
+
+class frame_quantile_axis1(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.df = DataFrame(np.random.randn(1000, 3),
+                            columns=list('ABC'))
+
+    def time_frame_quantile_axis1(self):
+        self.df.quantile([0.1, 0.5], axis=1)
diff --git a/codecov.yml b/codecov.yml
@@ -7,6 +7,3 @@ coverage:
       default:
         target: '50'
         branches: null
-    changes:
-      default:
-        branches: null
diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -483,6 +483,17 @@ SQL style merges. See the :ref:`Database style joining <merging.join>`
    right
    pd.merge(left, right, on='key')
 
+Another example that can be given is:
+
+.. ipython:: python
+
+   left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
+   right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
+   left
+   right
+   pd.merge(left, right, on='key')
+
+
 Append
 ~~~~~~
 

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
@@ -528,6 +528,13 @@ return a copy of the data rather than a view:
    jim joe
    1   z    0.64094
 
+Furthermore if you try to index something that is not fully lexsorted, this can raise:
+
+.. code-block:: ipython
+
+    In [5]: dfm.loc[(0,'y'):(1, 'z')]
+    KeyError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
+
 The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth:
 
 .. ipython:: python
@@ -542,6 +549,12 @@ The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and t
    dfm.index.is_lexsorted()
    dfm.index.lexsort_depth
 
+And now selection works as expected.
+
+.. ipython:: python
+
+   dfm.loc[(0,'y'):(1, 'z')]
+
 Take Methods
 ------------
 

diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -99,7 +99,7 @@ delimiter : str, default ``None``
   Alternative argument name for sep.
 delim_whitespace : boolean, default False
   Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``)
-  will be used as the delimiter. Equivalent to setting ``sep='\+s'``.
+  will be used as the delimiter. Equivalent to setting ``sep='\s+'``.
   If this option is set to True, nothing should be passed in for the
   ``delimiter`` parameter.
 

diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
@@ -445,6 +445,16 @@ If ``crosstab`` receives only two Series, it will provide a frequency table.
 
     pd.crosstab(df.A, df.B)
 
+Any input passed containing ``Categorical`` data will have **all** of its
+categories included in the cross-tabulation, even if the actual data does
+not contain any instances of a particular category.
+
+.. ipython:: python
+
+    foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
+    bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
+    pd.crosstab(foo, bar)
+
 Normalization
 ~~~~~~~~~~~~~
 

diff --git a/doc/source/text.rst b/doc/source/text.rst
@@ -281,7 +281,7 @@ Unlike ``extract`` (which returns only the first match),
 
 .. ipython:: python
 
-   s = pd.Series(["a1a2", "b1", "c1"], ["A", "B", "C"])
+   s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
    s
    two_groups = '(?P<letter>[a-z])(?P<digit>[0-9])'
    s.str.extract(two_groups, expand=True)
@@ -313,6 +313,17 @@ then ``extractall(pat).xs(0, level='match')`` gives the same result as
    extractall_result
    extractall_result.xs(0, level="match")
 
+``Index`` also supports ``.str.extractall``. It returns a ``DataFrame`` which has the
+same result as a ``Series.str.extractall`` with a default index (starts from 0).
+
+.. versionadded:: 0.18.2
+
+.. ipython:: python
+
+   pd.Index(["a1a2", "b1", "c1"]).str.extractall(two_groups)
+
+   pd.Series(["a1a2", "b1", "c1"]).str.extractall(two_groups)
+
 
 Testing for Strings that Match or Contain a Pattern
 ---------------------------------------------------

diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -563,7 +563,6 @@ Performance Improvements
 - Improved speed of SAS reader (:issue:`12656`, :issue:`12961`)
 - Performance improvements in ``.groupby(..).cumcount()`` (:issue:`11039`)
 - Improved memory usage in ``pd.read_csv()`` when using ``skiprows=an_integer`` (:issue:`13005`)
-
 - Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`)
 - Improved performance of ``Period`` construction and time series plotting (:issue:`12903`, :issue:`11831`).
 - Improved performance of ``.str.encode()`` and ``.str.decode()`` methods (:issue:`13008`)

diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -31,8 +31,14 @@ Other enhancements
 
 - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`)
 
-- Support decimal option in PythonParser
+- ``Index`` now supports ``.str.extractall()`` which returns ``DataFrame``, see :ref:`Extract all matches in each subject (extractall) <text.extractall>` (:issue:`10008`, :issue:`13156`)
+
+  .. ipython:: python
 
+     idx = pd.Index(["a1a2", "b1", "c1"])
+     idx.str.extractall("[ab](?P<digit>\d)")
+
+- Support decimal option in PythonParser
 
 .. _whatsnew_0182.api:
 
@@ -97,22 +103,31 @@ Performance Improvements
 
 - Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`)
 - Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such cases (:issue:`13082`)
+- increased performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`)
+
+
 
 
 
 .. _whatsnew_0182.bug_fixes:
 
 Bug Fixes
 ~~~~~~~~~
-- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
 
+- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
+- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
+- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
+- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
 
 
+- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`)
 
 
+- Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()``); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`)
 
 
 
+- Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError``  (:issue:`13156`)
 
 
 - Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`)
@@ -121,14 +136,17 @@ Bug Fixes
 
 
 
+- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
 
 
 
 
 
 
+- Bug in ``Series`` arithmetic raises ``TypeError`` if it contains datetime-like as ``object`` dtype (:issue:`13043`)
 
 
 
 - Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`)
 - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
+- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -985,7 +985,7 @@ def __setstate__(self, state):
 
         # Provide compatibility with pre-0.15.0 Categoricals.
         if '_codes' not in state and 'labels' in state:
-            state['_codes'] = state.pop('labels')
+            state['_codes'] = state.pop('labels').astype(np.int8)
         if '_categories' not in state and '_levels' in state:
             state['_categories'] = self._validate_categories(state.pop(
                 '_levels'))

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4989,31 +4989,27 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
         0.5  2.5  55.0
         """
         self._check_percentile(q)
-        if not com.is_list_like(q):
-            q = [q]
-            squeeze = True
-        else:
-            squeeze = False
 
         data = self._get_numeric_data() if numeric_only else self
         axis = self._get_axis_number(axis)
+        is_transposed = axis == 1
 
-        def _quantile(series):
-            res = series.quantile(q, interpolation=interpolation)
-            return series.name, res
-
-        if axis == 1:
+        if is_transposed:
             data = data.T
 
-        # unable to use DataFrame.apply, becasuse data may be empty
-        result = dict(_quantile(s) for (_, s) in data.iteritems())
-        result = self._constructor(result, columns=data.columns)
-        if squeeze:
-            if result.shape == (1, 1):
-                result = result.T.iloc[:, 0]  # don't want scalar
-            else:
-                result = result.T.squeeze()
-            result.name = None  # For groupby, so it can set an index name
+        result = data._data.quantile(qs=q,
+                                     axis=1,
+                                     interpolation=interpolation,
+                                     transposed=is_transposed)
+
+        if result.ndim == 2:
+            result = self._constructor(result)
+        else:
+            result = self._constructor_sliced(result, name=q)
+
+        if is_transposed:
+            result = result.T
+
         return result
 
     def to_timestamp(self, freq=None, how='start', axis=0, copy=True):