diff --git a/README.rst b/README.rst index 269dd2afa..9d40ab061 100644 --- a/README.rst +++ b/README.rst @@ -85,7 +85,7 @@ Building on Linux with setuptools export PYVER=<3.6 or 3.7> export NUMPYVER=<1.16 or 1.17> - conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=2.0.0 gcc_linux-64 gxx_linux-64 + conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.53.1 pandas=1.2.0 pyarrow=4.0.1 gcc_linux-64 gxx_linux-64 source activate sdc-env git clone https://github.com/IntelPython/sdc.git cd sdc @@ -123,7 +123,7 @@ Building on Windows with setuptools set PYVER=<3.6 or 3.7> set NUMPYVER=<1.16 or 1.17> - conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=2.0.0 + conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.53.1 pandas=1.2.0 pyarrow=4.0.1 conda activate sdc-env set INCLUDE=%INCLUDE%;%CONDA_PREFIX%\Library\include set LIB=%LIB%;%CONDA_PREFIX%\Library\lib diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 4886a6653..16e552f86 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -1,6 +1,6 @@ {% set NUMBA_VERSION = "==0.53.1" %} {% set PANDAS_VERSION = "==1.2.0" %} -{% set PYARROW_VERSION = "==2.0.0" %} +{% set PYARROW_VERSION = "==4.0.1" %} package: name: sdc diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 2156c2214..0065b1d02 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -41,14 +41,14 @@ Distribution includes Intel SDC for Python 3.6 and 3.7 for Windows and Linux pla Intel SDC conda package can be installed using the steps below: :: - > conda create -n sdc_env python=<3.7 or 3.6> pyarrow=2.0.0 pandas=1.2.0 -c anaconda -c conda-forge + > conda create -n sdc_env python=<3.7 or 3.6> pyarrow=4.0.1 pandas=1.2.0 -c anaconda -c conda-forge > conda activate sdc_env > conda install sdc -c intel/label/beta -c intel -c defaults -c conda-forge --override-channels Intel SDC wheel package can be installed using the steps below: :: - > conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=2.0.0 pandas=1.2.0 -c anaconda -c conda-forge + > conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=4.0.1 pandas=1.2.0 -c anaconda -c conda-forge > conda activate sdc_env > pip install --index-url https://pypi.anaconda.org/intel/label/beta/simple --extra-index-url https://pypi.anaconda.org/intel/simple --extra-index-url https://pypi.org/simple sdc diff --git a/requirements.txt b/requirements.txt index 5b123c130..db7518037 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ numpy>=1.16 pandas==1.2.0 -pyarrow==2.0.0 +pyarrow==4.0.1 numba==0.53.1 tbb tbb-devel diff --git a/sdc/io/csv_ext.py b/sdc/io/csv_ext.py index e772d8b4c..cb800b9c2 100644 --- a/sdc/io/csv_ext.py +++ b/sdc/io/csv_ext.py @@ -470,9 +470,11 @@ def pandas_read_csv( try: for column in parse_dates: name = f"f{column}" - # TODO: Try to help pyarrow infer date type - set DateType. - # dtype[name] = pyarrow.from_numpy_dtype(np.datetime64) # string - del column_types[name] + # starting from pyarrow=3.0.0 strings are parsed to DateType (converted back to 'object' + # when using to_pandas), but not TimestampType (that is used to represent np.datetime64) + # see: pyarrow.from_numpy_dtype(np.datetime64('NaT', 's')) + # so make pyarrow infer needed type manually + column_types[name] = pyarrow.timestamp('s') except: pass parse_options = pyarrow.csv.ParseOptions( diff --git a/setup.py b/setup.py index a730e7373..0d494bf2a 100644 --- a/setup.py +++ b/setup.py @@ -404,7 +404,7 @@ def run(self): install_requires=[ 'numpy>=1.16', 'pandas==1.2.0', - 'pyarrow==2.0.0', + 'pyarrow==4.0.1', 'numba==0.53.1', 'tbb' ],