In [9]:
# pandas to_datetime()
import pandas as pd
idx = pd.to_datetime(['2000-05-10 12:00:30', '2010-01-01 08:12:34', '', None])
idx

DatetimeIndex(['2000-05-10 12:00:30', '2010-01-01 08:12:34', 'NaT', 'NaT'], dtype='datetime64[ns]', freq=None)

In [6]:
idx.isnull()

array([False, False,  True,  True])

# Time Series Basics

In [8]:
import numpy as np
from datetime import datetime
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02    0.373766
2011-01-05   -1.264723
2011-01-07    0.650804
2011-01-08   -0.415439
2011-01-10    0.872782
2011-01-12    0.659477
dtype: float64

In [10]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [12]:
ts.index[0]  # a pandas Timestamp object

Timestamp('2011-01-02 00:00:00')

## Indexing, selecting, subsetting

In [24]:
# the following return the same
ts[2]
ts[ts.index[2]]
ts['2011-01-07']
ts['20110107']

0.6508044247671989

In [25]:
longer_ts = pd.Series(np.random.randn(1000),
                      index=pd.date_range('2000-01-01', periods=1000))
longer_ts[:10]

2000-01-01   -0.832591
2000-01-02    0.348359
2000-01-03    0.705943
2000-01-04    0.584146
2000-01-05    0.270903
2000-01-06   -0.927492
2000-01-07    0.750245
2000-01-08    0.476494
2000-01-09   -0.155475
2000-01-10   -1.008983
Freq: D, dtype: float64

In [28]:
longer_ts['2001'][:5]

2001-01-01    0.675866
2001-01-02    1.367049
2001-01-03   -1.826323
2001-01-04   -0.472024
2001-01-05   -0.214990
Freq: D, dtype: float64

In [29]:
longer_ts['2001-05'][:5]

2001-05-01   -0.074589
2001-05-02    1.418682
2001-05-03    2.849506
2001-05-04    0.263913
2001-05-05   -0.414803
Freq: D, dtype: float64

In [33]:
longer_ts[datetime(2001, 3, 1):datetime(2001, 3, 5)]
longer_ts['2001-03-01':'2001-03-05']  # same 
# Does it matter whether the time series is ordered chornologically?

2001-03-01   -0.099840
2001-03-02   -0.172835
2001-03-03    0.283785
2001-03-04    0.955325
2001-03-05    0.469126
Freq: D, dtype: float64

In [35]:
longer_ts.truncate(after='2000-01-05')

2000-01-01   -0.832591
2000-01-02    0.348359
2000-01-03    0.705943
2000-01-04    0.584146
2000-01-05    0.270903
Freq: D, dtype: float64

In [36]:
# DataFrame with time index
dates = pd.date_range('2000-01-01', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4), index=dates,
                       columns=['Colorado', 'Texas', 'New York', 'Ohio'])
long_df.loc['2001-05']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,1.293342,0.142261,0.673107,0.326799
2001-05-09,1.839807,1.151669,0.180423,-0.578674
2001-05-16,2.161119,-0.389969,-1.367924,-0.319405
2001-05-23,0.235542,-0.093754,-0.209002,0.135409
2001-05-30,0.229981,-0.134746,1.106641,0.095064


## Dupllicated indices

In [37]:
dates = pd.DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
                          '2000-01-02', '2000-01-03'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [39]:
dup_ts.index.is_unique

False

In [40]:
dup_ts['2000-01-01']

0

In [41]:
dup_ts['2000-01-02']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [42]:
dup_ts.groupby(level=0).count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

# Date Ranges, Frequencies, and Shifting
## Creating date ranges

In [48]:
# all the following return the same
pd.date_range('2014-04-01', '2014-04-05')
pd.date_range(start='2014-04-01', periods=5)
pd.date_range(end='2014-04-05', periods=5)

DatetimeIndex(['2014-04-01', '2014-04-02', '2014-04-03', '2014-04-04',
               '2014-04-05'],
              dtype='datetime64[ns]', freq='D')

In [51]:
pd.date_range(start='2014-04-01 12:34:56', periods=5)

DatetimeIndex(['2014-04-01 12:34:56', '2014-04-02 12:34:56',
               '2014-04-03 12:34:56', '2014-04-04 12:34:56',
               '2014-04-05 12:34:56'],
              dtype='datetime64[ns]', freq='D')

In [52]:
pd.date_range(start='2014-04-01 12:34:56', periods=5, normalize=True)

DatetimeIndex(['2014-04-01', '2014-04-02', '2014-04-03', '2014-04-04',
               '2014-04-05'],
              dtype='datetime64[ns]', freq='D')

## Frequencies and date offset

In [54]:
pd.date_range(start='2015-06-01', periods=8, freq='1H30min')

DatetimeIndex(['2015-06-01 00:00:00', '2015-06-01 01:30:00',
               '2015-06-01 03:00:00', '2015-06-01 04:30:00',
               '2015-06-01 06:00:00', '2015-06-01 07:30:00',
               '2015-06-01 09:00:00', '2015-06-01 10:30:00'],
              dtype='datetime64[ns]', freq='90T')

In [50]:
pd.date_range('2010-01-01', '2010-02-01', freq='W-MON')  # all Monday's within the range

DatetimeIndex(['2010-01-04', '2010-01-11', '2010-01-18', '2010-01-25',
               '2010-02-01'],
              dtype='datetime64[ns]', freq='W-MON')

In [55]:
pd.date_range('2010-01-01', '2010-06-30', freq='WOM-1WED')  # 1st Wed in months

DatetimeIndex(['2010-01-06', '2010-02-03', '2010-03-03', '2010-04-07',
               '2010-05-05', '2010-06-02'],
              dtype='datetime64[ns]', freq='WOM-1WED')

See [time-series frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases) for a list of possible frequencies.

## Shifting
### Shifting time series

In [56]:
ts

2011-01-02    0.373766
2011-01-05   -1.264723
2011-01-07    0.650804
2011-01-08   -0.415439
2011-01-10    0.872782
2011-01-12    0.659477
dtype: float64

In [60]:
ts.shift(-2, freq='D')

2010-12-31    0.373766
2011-01-03   -1.264723
2011-01-05    0.650804
2011-01-06   -0.415439
2011-01-08    0.872782
2011-01-10    0.659477
dtype: float64

In [59]:
ts.shift(1, freq='M')  # note: 'M' refers to last days of months

2011-01-31    0.373766
2011-01-31   -1.264723
2011-01-31    0.650804
2011-01-31   -0.415439
2011-01-31    0.872782
2011-01-31    0.659477
dtype: float64

### Shifting dates with offsets

In [69]:
from pandas.tseries import offsets
now = datetime(2012, 3, 15)
now + offsets.Day() * 3

Timestamp('2012-03-18 00:00:00')

In [70]:
offsets.MonthEnd().rollforward(now)

Timestamp('2012-03-31 00:00:00')

In [71]:
offsets.MonthEnd().rollback(now)

Timestamp('2012-02-29 00:00:00')

In [72]:
now + offsets.MonthEnd()  # roll forward

Timestamp('2012-03-31 00:00:00')

In [74]:
now + offsets.MonthEnd() * 2  # the first increment is a roll forward

Timestamp('2012-04-30 00:00:00')

# Time Zone Handling