diff --git a/appveyor.yml b/appveyor.yml index 65e62f887554e..a1f8886f6d068 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -74,12 +74,18 @@ install: # create our env - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas + - cmd: pip install moto - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" + # add some pip only reqs to the env + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip + - cmd: echo "installing requirements from %REQ%" + - cmd: pip install -Ur %REQ% + # build em using the local source checkout in the correct windows env - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 6432ccfb19efe..d90c994b3d194 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -67,6 +67,9 @@ def time_value_counts_dropna(self): def time_rendering(self): str(self.sel) + def time_set_categories(self): + self.ts.cat.set_categories(self.ts.cat.categories[::2]) + class Categoricals3(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 78d66295f28cc..df3c2bf3e4b46 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -78,6 +78,65 @@ def time_value_counts_pindex(self): self.i.value_counts() +class Properties(object): + def setup(self): + self.per = Period('2017-09-06 08:28', freq='min') + + def time_year(self): + self.per.year + + def time_month(self): + self.per.month + + def time_day(self): + self.per.day + + def time_hour(self): + self.per.hour + + def time_minute(self): + self.per.minute + + def time_second(self): + self.per.second + + def time_is_leap_year(self): + self.per.is_leap_year + + def time_quarter(self): + self.per.quarter + + def time_qyear(self): + self.per.qyear + + def time_week(self): + self.per.week + + def time_daysinmonth(self): + self.per.daysinmonth + + def time_dayofweek(self): + self.per.dayofweek + + def time_dayofyear(self): + self.per.dayofyear + + def time_start_time(self): + self.per.start_time + + def time_end_time(self): + self.per.end_time + + def time_to_timestamp(): + self.per.to_timestamp() + + def time_now(): + self.per.now() + + def time_asfreq(): + self.per.asfreq('A') + + class period_standard_indexing(object): goal_time = 0.2 diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 29ca69970104b..fd79f907625e9 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -67,6 +67,7 @@ time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 time conda install -n pandas pytest>=3.1.0 || exit 1 source activate pandas +time pip install moto || exit 1 # build but don't install echo "[build em]" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index d26689f2e6b4b..b85263daa1eac 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest>=3.1.0 -time pip install pytest-xdist +time pip install pytest-xdist moto if [ "$LINT" ]; then conda install flake8 diff --git a/ci/requirements-2.7_WIN.pip b/ci/requirements-2.7_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_NUMPY_DEV.pip b/ci/requirements-3.6_NUMPY_DEV.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_WIN.pip b/ci/requirements-3.6_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index c7190c506ba18..dbc4f6cbd6509 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -5,3 +5,4 @@ cython pytest>=3.1.0 pytest-cov flake8 +moto diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 3f145cf955664..3bda8c7eacb61 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -625,7 +625,7 @@ Index Types We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `. ``TimedeltaIndex`` are :ref:`here `. -In the following sub-sections we will highlite some other index types. +In the following sub-sections we will highlight some other index types. .. _indexing.categoricalindex: @@ -645,7 +645,7 @@ and allows efficient indexing and storage of an index with a large number of dup df.dtypes df.B.cat.categories -Setting the index, will create create a ``CategoricalIndex`` +Setting the index, will create a ``CategoricalIndex`` .. ipython:: python @@ -681,7 +681,7 @@ Groupby operations on the index will preserve the index nature as well Reindexing operations, will return a resulting index based on the type of the passed indexer, meaning that passing a list will return a plain-old-``Index``; indexing with a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories -of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with +of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with values NOT in the categories, similarly to how you can reindex ANY pandas index. .. ipython :: python @@ -722,7 +722,7 @@ Int64Index and RangeIndex Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects. ``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects. -``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analagous to python `range types `__. +``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to python `range types `__. .. _indexing.float64index: @@ -963,7 +963,7 @@ index can be somewhat complicated. For example, the following does not work: s.loc['c':'e'+1] A very common use case is to limit a time series to start and end at two -specific dates. To enable this, we made the design design to make label-based +specific dates. To enable this, we made the design to make label-based slicing include both endpoints: .. ipython:: python diff --git a/doc/source/api.rst b/doc/source/api.rst index 27a4ab9cc6cbc..4e02f7b11f466 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -218,10 +218,19 @@ Top-level dealing with datetimelike to_timedelta date_range bdate_range + cdate_range period_range timedelta_range infer_freq +Top-level dealing with intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + interval_range + Top-level evaluation ~~~~~~~~~~~~~~~~~~~~ @@ -1282,7 +1291,7 @@ Index ----- **Many of these methods or variants thereof are available on the objects -that contain an index (Series/Dataframe) and those should most likely be +that contain an index (Series/DataFrame) and those should most likely be used before calling these methods directly.** .. autosummary:: diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 42c28df3a6030..0990d2bd15ee6 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -923,7 +923,7 @@ Passing a named function will yield that name for the row: Aggregating with a dict +++++++++++++++++++++++ -Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFame.agg`` +Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFrame.agg`` allows you to customize which functions are applied to which columns. Note that the results are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering. diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 23699393958cf..14cfdbc364837 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -654,7 +654,7 @@ aggregation with, outputting a DataFrame: r['A'].agg([np.sum, np.mean, np.std]) -On a widowed DataFrame, you can pass a list of functions to apply to each +On a windowed DataFrame, you can pass a list of functions to apply to each column, which produces an aggregated result with a hierarchical index: .. ipython:: python diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index e1231b9a4a200..e9a7d8dd0a46e 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -561,7 +561,7 @@ must be either implemented on GroupBy or available via :ref:`dispatching .. note:: - If you pass a dict to ``aggregate``, the ordering of the output colums is + If you pass a dict to ``aggregate``, the ordering of the output columns is non-deterministic. If you want to be sure the output columns will be in a specific order, you can use an ``OrderedDict``. Compare the output of the following two commands: @@ -1211,7 +1211,7 @@ Groupby by Indexer to 'resample' data Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples. -In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized. +In order to resample to work on indices that are non-datetimelike, the following procedure can be utilized. In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 8474116c38082..edbc4e6d7fd22 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -714,7 +714,7 @@ Finally, one can also set a seed for ``sample``'s random number generator using Setting With Enlargement ------------------------ -The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis. +The ``.loc/[]`` operations can perform enlargement when setting a non-existent key for that axis. In the ``Series`` case this is effectively an appending operation diff --git a/doc/source/io.rst b/doc/source/io.rst index 8fbb23769492e..fcf7f6029197b 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3077,7 +3077,7 @@ Compressed pickle files .. versionadded:: 0.20.0 -:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. `zip`` file supports read only and must contain only one data file to be read in. diff --git a/doc/source/merging.rst b/doc/source/merging.rst index a5ee1b1a9384c..72787ea97a782 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1329,7 +1329,7 @@ By default we are taking the asof of the quotes. on='time', by='ticker') -We only asof within ``2ms`` betwen the quote time and the trade time. +We only asof within ``2ms`` between the quote time and the trade time. .. ipython:: python @@ -1338,8 +1338,8 @@ We only asof within ``2ms`` betwen the quote time and the trade time. by='ticker', tolerance=pd.Timedelta('2ms')) -We only asof within ``10ms`` betwen the quote time and the trade time and we exclude exact matches on time. -Note that though we exclude the exact matches (of the quotes), prior quotes DO propogate to that point +We only asof within ``10ms`` between the quote time and the trade time and we exclude exact matches on time. +Note that though we exclude the exact matches (of the quotes), prior quotes DO propagate to that point in time. .. ipython:: python diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 65b411ccd4af2..b33b5c304853a 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -320,7 +320,7 @@ Interpolation The ``limit_direction`` keyword argument was added. -Both Series and Dataframe objects have an ``interpolate`` method that, by default, +Both Series and DataFrame objects have an ``interpolate`` method that, by default, performs linear interpolation at missing datapoints. .. ipython:: python diff --git a/doc/source/options.rst b/doc/source/options.rst index 1592caf90546c..f042e4d3f5120 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -313,9 +313,9 @@ display.large_repr truncate For DataFrames exceeding max_ro display.latex.repr False Whether to produce a latex DataFrame representation for jupyter frontends that support it. -display.latex.escape True Escapes special caracters in Dataframes, when +display.latex.escape True Escapes special characters in DataFrames, when using the to_latex method. -display.latex.longtable False Specifies if the to_latex method of a Dataframe +display.latex.longtable False Specifies if the to_latex method of a DataFrame uses the longtable format. display.latex.multicolumn True Combines columns when using a MultiIndex display.latex.multicolumn_format 'l' Alignment of multicolumn labels diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index fab83222b313f..1209c4a8d6be8 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -156,7 +156,7 @@ the level numbers: stacked.unstack('second') Notice that the ``stack`` and ``unstack`` methods implicitly sort the index -levels involved. Hence a call to ``stack`` and then ``unstack``, or viceversa, +levels involved. Hence a call to ``stack`` and then ``unstack``, or vice versa, will result in a **sorted** copy of the original DataFrame or Series: .. ipython:: python diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index cf16cee501a3e..89efa7b4be3ee 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -132,7 +132,7 @@ dtype, ``fill_value`` default changes: s.to_sparse() You can change the dtype using ``.astype()``, the result is also sparse. Note that -``.astype()`` also affects to the ``fill_value`` to keep its dense represantation. +``.astype()`` also affects to the ``fill_value`` to keep its dense representation. .. ipython:: python diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index c250787785e14..1d6ce163cf977 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -169,7 +169,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to resuse your existing knowledge of how to interact with DataFrames.\n", + "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to reuse your existing knowledge of how to interact with DataFrames.\n", "\n", "Notice also that our function returned a string containing the CSS attribute and value, separated by a colon just like in a `