Skip to content

Commit

Permalink
PERF: optimize NaT lookups in cython modules (pandas-dev#24008)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and Pingviinituutti committed Feb 28, 2019
1 parent 2d0b241 commit 85c5e56
Show file tree
Hide file tree
Showing 13 changed files with 69 additions and 59 deletions.
4 changes: 2 additions & 2 deletions pandas/_libs/tslib.pyx
Expand Up @@ -40,8 +40,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject,
tz_convert_utc_to_tzlocal)

# many modules still look for NaT and iNaT here despite them not being needed
from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821
from tslibs.nattype cimport checknull_with_nat, NPY_NAT
from tslibs.nattype import nat_strings, iNaT # noqa:F821
from tslibs.nattype cimport checknull_with_nat, NPY_NAT, c_NaT as NaT

from tslibs.offsets cimport to_offset

Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Expand Up @@ -39,8 +39,8 @@ from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
from timezones import UTC
from parsing import parse_datetime_string

from nattype import nat_strings, NaT
from nattype cimport NPY_NAT, checknull_with_nat
from nattype import nat_strings
from nattype cimport NPY_NAT, checknull_with_nat, c_NaT as NaT

# ----------------------------------------------------------------------
# Constants
Expand Down
11 changes: 11 additions & 0 deletions pandas/_libs/tslibs/nattype.pxd
@@ -1,9 +1,20 @@
# -*- coding: utf-8 -*-

from cpython.datetime cimport datetime

from numpy cimport int64_t
cdef int64_t NPY_NAT

cdef bint _nat_scalar_rules[6]


cdef class _NaT(datetime):
cdef readonly:
int64_t value
object freq

cdef _NaT c_NaT


cdef bint checknull_with_nat(object val)
cdef bint is_null_datetimelike(object val)
75 changes: 38 additions & 37 deletions pandas/_libs/tslibs/nattype.pyx
Expand Up @@ -47,7 +47,7 @@ def _make_nan_func(func_name, doc):

def _make_nat_func(func_name, doc):
def f(*args, **kwargs):
return NaT
return c_NaT
f.__name__ = func_name
f.__doc__ = doc
return f
Expand All @@ -67,10 +67,10 @@ def _make_error_func(func_name, cls):


cdef _nat_divide_op(self, other):
if PyDelta_Check(other) or is_timedelta64_object(other) or other is NaT:
if PyDelta_Check(other) or is_timedelta64_object(other) or other is c_NaT:
return np.nan
if is_integer_object(other) or is_float_object(other):
return NaT
return c_NaT
return NotImplemented


Expand All @@ -82,15 +82,15 @@ cdef _nat_rdivide_op(self, other):

def __nat_unpickle(*args):
# return constant defined in the module
return NaT
return c_NaT

# ----------------------------------------------------------------------


cdef class _NaT(datetime):
cdef readonly:
int64_t value
object freq
# cdef readonly:
# int64_t value
# object freq

def __hash__(_NaT self):
# py3k needs this defined here
Expand All @@ -116,18 +116,18 @@ cdef class _NaT(datetime):

def __add__(self, other):
if PyDateTime_Check(other):
return NaT
return c_NaT

elif hasattr(other, 'delta'):
# Timedelta, offsets.Tick, offsets.Week
return NaT
return c_NaT
elif getattr(other, '_typ', None) in ['dateoffset', 'series',
'period', 'datetimeindex',
'timedeltaindex']:
# Duplicate logic in _Timestamp.__add__ to avoid needing
# to subclass; allows us to @final(_Timestamp.__add__)
return NotImplemented
return NaT
return c_NaT

def __sub__(self, other):
# Duplicate some logic from _Timestamp.__sub__ to avoid needing
Expand Down Expand Up @@ -184,19 +184,6 @@ cdef class _NaT(datetime):
""" Returns a numpy.datetime64 object with 'ns' precision """
return np.datetime64('NaT', 'ns')


class NaTType(_NaT):
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""

def __new__(cls):
cdef _NaT base

base = _NaT.__new__(cls, 1, 1, 1)
base.value = NPY_NAT
base.freq = None

return base

def __repr__(self):
return 'NaT'

Expand All @@ -216,20 +203,11 @@ class NaTType(_NaT):
def __long__(self):
return NPY_NAT

def __reduce_ex__(self, protocol):
# python 3.6 compat
# http://bugs.python.org/issue28730
# now __reduce_ex__ is defined and higher priority than __reduce__
return self.__reduce__()

def __reduce__(self):
return (__nat_unpickle, (None, ))

def total_seconds(self):
"""
Total duration of timedelta in seconds (to ns precision)
"""
# GH 10939
# GH#10939
return np.nan

@property
Expand Down Expand Up @@ -260,6 +238,28 @@ class NaTType(_NaT):
def is_year_end(self):
return False


class NaTType(_NaT):
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""

def __new__(cls):
cdef _NaT base

base = _NaT.__new__(cls, 1, 1, 1)
base.value = NPY_NAT
base.freq = None

return base

def __reduce_ex__(self, protocol):
# python 3.6 compat
# http://bugs.python.org/issue28730
# now __reduce_ex__ is defined and higher priority than __reduce__
return self.__reduce__()

def __reduce__(self):
return (__nat_unpickle, (None, ))

def __rdiv__(self, other):
return _nat_rdivide_op(self, other)

Expand All @@ -271,7 +271,7 @@ class NaTType(_NaT):

def __rmul__(self, other):
if is_integer_object(other) or is_float_object(other):
return NaT
return c_NaT
return NotImplemented

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -659,14 +659,15 @@ class NaTType(_NaT):
""")


NaT = NaTType()
c_NaT = NaTType() # C-visible
NaT = c_NaT # Python-visible


# ----------------------------------------------------------------------

cdef inline bint checknull_with_nat(object val):
""" utility to check if a value is a nat or not """
return val is None or util.is_nan(val) or val is NaT
return val is None or util.is_nan(val) or val is c_NaT


cdef inline bint is_null_datetimelike(object val):
Expand All @@ -683,7 +684,7 @@ cdef inline bint is_null_datetimelike(object val):
"""
if val is None or util.is_nan(val):
return True
elif val is NaT:
elif val is c_NaT:
return True
elif util.is_timedelta64_object(val):
return val.view('int64') == NPY_NAT
Expand Down
5 changes: 3 additions & 2 deletions pandas/_libs/tslibs/period.pyx
Expand Up @@ -46,8 +46,9 @@ from frequencies cimport (get_freq_code, get_base_alias,
get_rule_month)
from parsing import parse_time_string
from resolution import Resolution
from nattype import nat_strings, NaT
from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike
from nattype import nat_strings
from nattype cimport (
_nat_scalar_rules, NPY_NAT, is_null_datetimelike, c_NaT as NaT)
from offsets cimport to_offset
from offsets import _Tick

Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/timedeltas.pyx
Expand Up @@ -33,8 +33,8 @@ from ccalendar import DAY_SECONDS
from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct,
pandas_timedeltastruct)

from nattype import nat_strings, NaT
from nattype cimport checknull_with_nat, NPY_NAT
from nattype import nat_strings
from nattype cimport checknull_with_nat, NPY_NAT, c_NaT as NaT
from offsets cimport to_offset

# ----------------------------------------------------------------------
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/tslibs/timestamps.pyx
Expand Up @@ -26,8 +26,7 @@ from conversion import tz_localize_to_utc, normalize_i8_timestamps
from conversion cimport (tz_convert_single, _TSObject,
convert_to_tsobject, convert_datetime_to_tsobject)
from fields import get_start_end_field, get_date_name_field
from nattype import NaT
from nattype cimport NPY_NAT
from nattype cimport NPY_NAT, c_NaT as NaT
from np_datetime import OutOfBoundsDatetime
from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds,
npy_datetimestruct, dt64_to_dtstruct)
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/timezones.pyx
Expand Up @@ -2,6 +2,8 @@

from cython import Py_ssize_t

from cpython.datetime cimport tzinfo

# dateutil compat
from dateutil.tz import (
tzutc as _dateutil_tzutc,
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/datetimes.py
Expand Up @@ -6,10 +6,9 @@
from pytz import utc

from pandas._libs import lib, tslib
from pandas._libs.tslib import NaT, Timestamp, iNaT
from pandas._libs.tslibs import (
ccalendar, conversion, fields, normalize_date, resolution as libresolution,
timezones)
NaT, Timestamp, ccalendar, conversion, fields, iNaT, normalize_date,
resolution as libresolution, timezones)
import pandas.compat as compat
from pandas.errors import PerformanceWarning
from pandas.util._decorators import Appender, cache_readonly
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/period.py
Expand Up @@ -4,8 +4,7 @@

import numpy as np

from pandas._libs.tslib import NaT, iNaT
from pandas._libs.tslibs import period as libperiod
from pandas._libs.tslibs import NaT, iNaT, period as libperiod
from pandas._libs.tslibs.fields import isleapyear_arr
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period, get_period_field_arr,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/scalar/test_nat.py
Expand Up @@ -4,7 +4,7 @@
import pytest
import pytz

from pandas._libs.tslib import iNaT
from pandas._libs.tslibs import iNaT

from pandas import (
DatetimeIndex, Index, NaT, Period, Series, Timedelta, TimedeltaIndex,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/scalar/timedelta/test_timedelta.py
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pytest

from pandas._libs.tslib import NaT, iNaT
from pandas._libs.tslibs import NaT, iNaT
import pandas.compat as compat

import pandas as pd
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/tseries/offsets/test_offsets.py
Expand Up @@ -5,13 +5,11 @@
import pytest
import pytz

import pandas._libs.tslib as tslib
from pandas._libs.tslib import NaT, Timestamp
from pandas._libs.tslibs import conversion, timezones
from pandas._libs.tslibs import (
NaT, OutOfBoundsDatetime, Timedelta, Timestamp, conversion, timezones)
from pandas._libs.tslibs.frequencies import (
INVALID_FREQ_ERR_MSG, get_freq_code, get_freq_str)
import pandas._libs.tslibs.offsets as liboffsets
from pandas._libs.tslibs.timedeltas import Timedelta
import pandas.compat as compat
from pandas.compat import range
from pandas.compat.numpy import np_datetime64_compat
Expand Down Expand Up @@ -124,7 +122,7 @@ def test_apply_out_of_range(self, tz_naive_fixture):
assert isinstance(result, datetime)
assert t.tzinfo == result.tzinfo

except tslib.OutOfBoundsDatetime:
except OutOfBoundsDatetime:
raise
except (ValueError, KeyError):
# we are creating an invalid offset
Expand Down

0 comments on commit 85c5e56

Please sign in to comment.