In [1]:
import numpy as np
import pandas as pd

In [2]:
from datetime import datetime
from datetime import timedelta

# 11.1 types and tools of date and time data

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2025, 11, 9, 22, 48, 35, 369911)

In [5]:
now.year, now.month, now.day

(2025, 11, 9)

In [6]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8,15)

In [7]:
delta

datetime.timedelta(days=926, seconds=56700)

In [8]:
delta.days

926

In [9]:
delta.seconds

56700

In [10]:
start = datetime(2011, 1, 7)

In [11]:
start + timedelta(12)

datetime.datetime(2011, 1, 19, 0, 0)

In [12]:
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

In [13]:
stamp = datetime(2011, 1, 3)

In [14]:
str(stamp)

'2011-01-03 00:00:00'

In [15]:
stamp.strftime("%Y-%M-%d")

'2011-00-03'

In [16]:
value = "2011-01-03"

In [17]:
datetime.strptime(value, "%Y-%m-%d")

datetime.datetime(2011, 1, 3, 0, 0)

In [18]:
datestrs = ["7/6/2011", "8/6/2011"]

In [19]:
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

In [20]:
datestrs = ["2011-07-06 12:00:00", "2011-08-06 00:00:00"]

In [21]:
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [22]:
idx = pd.to_datetime(datestrs + [None])

In [23]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [24]:
idx[2]

NaT

In [25]:
pd.isna(idx)

array([False, False,  True])

# 11.2 basic knowledge of time series

In [26]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]

In [27]:
ts = pd.Series(np.random.standard_normal(6), index=dates)

In [28]:
ts

2011-01-02   -1.562108
2011-01-05    0.484476
2011-01-07   -0.152886
2011-01-08   -0.440779
2011-01-10   -0.143157
2011-01-12    0.899952
dtype: float64

In [29]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [30]:
ts +ts[::2]

2011-01-02   -3.124216
2011-01-05         NaN
2011-01-07   -0.305772
2011-01-08         NaN
2011-01-10   -0.286313
2011-01-12         NaN
dtype: float64

In [31]:
ts.index.dtype

dtype('<M8[ns]')

In [32]:
stamp = ts.index[0]

In [33]:
stamp

Timestamp('2011-01-02 00:00:00')

# 11.2.1 index select subset construction

In [34]:
stamp = ts.index[2]

In [35]:
ts[stamp]

np.float64(-0.15288591326408302)

In [37]:
ts["2011-01-10"]

np.float64(-0.1431565921860741)

In [38]:
longer_ts = pd.Series(np.random.standard_normal(1000),
                      index=pd.date_range("2000-01-01", periods=1000))

In [39]:
longer_ts

2000-01-01    0.278555
2000-01-02    0.122254
2000-01-03   -1.293322
2000-01-04    0.508476
2000-01-05    2.181640
                ...   
2002-09-22   -0.551704
2002-09-23   -0.247315
2002-09-24    2.261034
2002-09-25   -0.171526
2002-09-26   -1.803316
Freq: D, Length: 1000, dtype: float64

In [41]:
longer_ts["2001"]

2001-01-01   -0.491720
2001-01-02   -0.020023
2001-01-03    0.793480
2001-01-04    0.193165
2001-01-05   -2.792579
                ...   
2001-12-27   -0.050635
2001-12-28    1.301712
2001-12-29   -1.192652
2001-12-30   -0.247193
2001-12-31   -2.465616
Freq: D, Length: 365, dtype: float64

In [42]:
longer_ts["2001-05"]

2001-05-01   -0.515365
2001-05-02    1.385148
2001-05-03    0.786506
2001-05-04    1.058878
2001-05-05   -0.560496
2001-05-06   -0.892526
2001-05-07    0.414798
2001-05-08   -0.012592
2001-05-09   -1.124384
2001-05-10   -0.453820
2001-05-11   -0.258482
2001-05-12    0.852764
2001-05-13    0.568224
2001-05-14    1.747496
2001-05-15    0.233400
2001-05-16   -0.414263
2001-05-17    0.269178
2001-05-18    0.300700
2001-05-19    0.904000
2001-05-20   -0.151550
2001-05-21    1.540754
2001-05-22    1.581111
2001-05-23    1.188037
2001-05-24   -0.936276
2001-05-25   -1.542885
2001-05-26    1.625947
2001-05-27    0.382260
2001-05-28   -0.031261
2001-05-29    0.786743
2001-05-30   -0.485054
2001-05-31   -0.695612
Freq: D, dtype: float64

In [43]:
ts[datetime(2011, 1, 7):]

2011-01-07   -0.152886
2011-01-08   -0.440779
2011-01-10   -0.143157
2011-01-12    0.899952
dtype: float64

In [45]:
ts[datetime(2011, 1, 7):datetime(2011, 1, 10)]

2011-01-07   -0.152886
2011-01-08   -0.440779
2011-01-10   -0.143157
dtype: float64

In [46]:
ts

2011-01-02   -1.562108
2011-01-05    0.484476
2011-01-07   -0.152886
2011-01-08   -0.440779
2011-01-10   -0.143157
2011-01-12    0.899952
dtype: float64

In [47]:
ts["2011-01-06":"2011-01-11"]

2011-01-07   -0.152886
2011-01-08   -0.440779
2011-01-10   -0.143157
dtype: float64

In [48]:
ts.truncate(after="2011-01-09")

2011-01-02   -1.562108
2011-01-05    0.484476
2011-01-07   -0.152886
2011-01-08   -0.440779
dtype: float64

In [49]:
dates = pd.date_range("2000-01-01", periods=100, freq="W-WED")

In [50]:
long_df = pd.DataFrame(np.random.standard_normal((100,4)),
                       index=dates,
                       columns=["Colorado", "Texas",
                                "New York", "Ohio"])

In [51]:
long_df.loc["2001-05"]

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,1.145576,1.854535,-1.890899,0.438676
2001-05-09,-1.395446,0.639579,-0.756597,-0.070331
2001-05-16,0.389657,-0.463889,0.560885,0.4021
2001-05-23,-1.781724,0.3287,-0.615988,0.029673
2001-05-30,0.70289,0.737968,-1.348889,-0.523497


# 11.2.2 time series with duplicate indexes

In [61]:
dates = pd.DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-02",
                      "2000-01-02", "2000-01-03"])

In [62]:
dup_ts = pd.Series(np.arange(5), index=dates)

In [63]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [64]:
dup_ts.index.is_unique

False

In [65]:
dup_ts["2000-01-03"]

np.int64(4)

In [66]:
dup_ts["2000-01-02"]

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [67]:
grouped = dup_ts.groupby(level=0)

In [68]:
grouped.mean()

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64

In [69]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

# 11.3 Range,frequency,and shift of dates

In [70]:
ts

2011-01-02   -1.562108
2011-01-05    0.484476
2011-01-07   -0.152886
2011-01-08   -0.440779
2011-01-10   -0.143157
2011-01-12    0.899952
dtype: float64

In [71]:
resampler = ts.resample("D")

In [73]:
resampler

<pandas.core.resample.DatetimeIndexResampler object at 0x796f23d6cd00>

# 11.3.1 Generate date range