In [1]:
from datetime import datetime

In [2]:
now = datetime.now()
now

datetime.datetime(2019, 8, 22, 7, 57, 56, 60587)

In [3]:
now.year, now.month, now.day

(2019, 8, 22)

In [4]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta

datetime.timedelta(days=926, seconds=56700)

In [5]:
delta.days, delta.seconds

(926, 56700)

In [6]:
from datetime import timedelta

In [7]:
start = datetime(2011, 1, 7)

In [8]:
start + timedelta(12)

datetime.datetime(2011, 1, 19, 0, 0)

In [9]:
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

In [10]:
timedelta(12)

datetime.timedelta(days=12)

## Converting Between String and Datetime

In [11]:
stamp = datetime(2011, 1, 3)

In [12]:
str(stamp)

'2011-01-03 00:00:00'

In [13]:
stamp.strftime('%Y-%m-%d')

'2011-01-03'

In [14]:
value = '2011-01-03'

In [15]:
datetime.strptime(value, '%Y-%m-%d')

datetime.datetime(2011, 1, 3, 0, 0)

In [16]:
datestrs = ['7/6/2011', '8/6/2011']

In [17]:
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

In [18]:
from dateutil.parser import parse

In [19]:
parse('2011-01-03')

datetime.datetime(2011, 1, 3, 0, 0)

In [20]:
parse('Jan 31, 1997 10:45 PM')

datetime.datetime(1997, 1, 31, 22, 45)

In [21]:
parse('6/12/2011', dayfirst = True)

datetime.datetime(2011, 12, 6, 0, 0)

In [22]:
import pandas as pd
import numpy as np

In [23]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']

In [24]:
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [25]:
idx = pd.to_datetime(datestrs + [None])
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [26]:
idx[2]

NaT

In [27]:
pd.isnull(idx)

array([False, False,  True])

# Time Series Basics

In [28]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]

In [29]:
ts = pd.Series(np.random.randn(6), index = dates)
ts

2011-01-02   -0.500700
2011-01-05   -0.334492
2011-01-07    0.624306
2011-01-08   -0.336707
2011-01-10    0.177287
2011-01-12    0.300193
dtype: float64

In [30]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [31]:
ts + ts[::2]

2011-01-02   -1.001400
2011-01-05         NaN
2011-01-07    1.248612
2011-01-08         NaN
2011-01-10    0.354574
2011-01-12         NaN
dtype: float64

In [32]:
ts.index.dtype

dtype('<M8[ns]')

In [33]:
stamp = ts.index[0]

In [34]:
stamp

Timestamp('2011-01-02 00:00:00')

### Indexing, Selection, Subsetting

In [35]:
stamp = ts.index[2]

In [36]:
ts[stamp]

0.6243062080204753

In [37]:
ts['1/10/2011']

0.1772870642497754

In [38]:
ts['20110110']

0.1772870642497754

In [39]:
longer_ts = pd.Series(np.random.randn(1000), index = pd.date_range('1/1/2000', periods = 1000))
longer_ts

2000-01-01    0.010182
2000-01-02   -0.703782
2000-01-03   -0.581529
2000-01-04   -0.300245
2000-01-05   -0.739153
2000-01-06    0.529188
2000-01-07    0.273403
2000-01-08    0.553786
2000-01-09    1.354110
2000-01-10    1.236635
2000-01-11   -1.047751
2000-01-12    0.797935
2000-01-13    0.571641
2000-01-14   -0.551007
2000-01-15   -2.804449
2000-01-16   -0.128998
2000-01-17   -0.849656
2000-01-18   -0.001421
2000-01-19    1.085428
2000-01-20    0.692346
2000-01-21   -1.448621
2000-01-22   -1.007999
2000-01-23   -0.702654
2000-01-24    1.385132
2000-01-25   -0.243124
2000-01-26    0.070611
2000-01-27   -0.381232
2000-01-28   -0.590993
2000-01-29    0.876900
2000-01-30   -1.368061
                ...   
2002-08-28   -0.347743
2002-08-29    1.108536
2002-08-30    0.634731
2002-08-31    0.218916
2002-09-01   -1.302166
2002-09-02   -0.233152
2002-09-03   -0.786911
2002-09-04   -1.603065
2002-09-05    0.179648
2002-09-06   -1.325742
2002-09-07    2.038028
2002-09-08   -1.254594
2002-09-09 

In [40]:
longer_ts['2001-05']

2001-05-01    0.975384
2001-05-02   -0.740056
2001-05-03   -0.935632
2001-05-04    1.229711
2001-05-05   -1.134324
2001-05-06    0.015352
2001-05-07   -0.227712
2001-05-08    0.165709
2001-05-09    1.214862
2001-05-10    0.867923
2001-05-11    0.184786
2001-05-12   -0.842830
2001-05-13   -0.656268
2001-05-14   -0.516894
2001-05-15    0.258119
2001-05-16   -1.086992
2001-05-17    0.125094
2001-05-18    1.829143
2001-05-19    1.335052
2001-05-20    0.948122
2001-05-21    0.407895
2001-05-22   -0.171975
2001-05-23    1.735346
2001-05-24    0.769540
2001-05-25   -0.867917
2001-05-26    1.652877
2001-05-27    1.546777
2001-05-28    1.453625
2001-05-29   -0.748677
2001-05-30    0.382259
2001-05-31    0.821927
Freq: D, dtype: float64

In [41]:
ts[datetime(2011,1,7):]

2011-01-07    0.624306
2011-01-08   -0.336707
2011-01-10    0.177287
2011-01-12    0.300193
dtype: float64

In [42]:
ts

2011-01-02   -0.500700
2011-01-05   -0.334492
2011-01-07    0.624306
2011-01-08   -0.336707
2011-01-10    0.177287
2011-01-12    0.300193
dtype: float64

In [43]:
ts['1/6/2011':'1/11/2011']

2011-01-07    0.624306
2011-01-08   -0.336707
2011-01-10    0.177287
dtype: float64

In [44]:
ts.truncate(after = '1/9/2011')

2011-01-02   -0.500700
2011-01-05   -0.334492
2011-01-07    0.624306
2011-01-08   -0.336707
dtype: float64

In [45]:
dates = pd.date_range('1/1/2000', periods = 100, freq = 'W-WED')

In [46]:
long_df = pd.DataFrame(np.random.randn(100, 4), index=dates,
                       columns=['Colorado', 'Texas', 'New York', 'Ohio'])

In [47]:
long_df.loc['5-2001']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,0.08442,0.263959,0.221238,-0.227127
2001-05-09,-0.422233,-0.873881,-1.484081,-0.391945
2001-05-16,0.527071,-2.435643,1.168185,0.416573
2001-05-23,-0.843037,0.097488,1.496303,0.268366
2001-05-30,-0.765767,-0.620769,-0.500268,-0.493446


### Time Series with Duplicate Indices

In [48]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',
                          '1/3/2000'])

In [49]:
dup_ts = pd.Series(np.arange(5), index = dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [50]:
dup_ts.index.is_unique

False

In [51]:
dup_ts['1/3/2000']

4

In [52]:
dup_ts['1/2/2000']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

### grouping of non unique timestamps

In [53]:
grouped = dup_ts.groupby(level = 0)

In [54]:
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32

In [55]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

# Date Ranges, Frequencies, and Shifting

In [56]:
ts

2011-01-02   -0.500700
2011-01-05   -0.334492
2011-01-07    0.624306
2011-01-08   -0.336707
2011-01-10    0.177287
2011-01-12    0.300193
dtype: float64

#### D is for daily

In [57]:
resampler = ts.resample('D')

In [58]:
resampler

DatetimeIndexResampler [freq=<Day>, axis=0, closed=left, label=left, convention=start, base=0]

### Generating Date Ranges

In [59]:
index = pd.date_range('2012-04-01', '2012-06-01')
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [60]:
pd.date_range(start = '2012-04-01', periods = 20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [61]:
pd.date_range(end = '2012-06-01', periods = 20)

DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [62]:
pd.date_range(end = '2012-06-01', periods = 20, freq = 'BM')

DatetimeIndex(['2010-10-29', '2010-11-30', '2010-12-31', '2011-01-31',
               '2011-02-28', '2011-03-31', '2011-04-29', '2011-05-31',
               '2011-06-30', '2011-07-29', '2011-08-31', '2011-09-30',
               '2011-10-31', '2011-11-30', '2011-12-30', '2012-01-31',
               '2012-02-29', '2012-03-30', '2012-04-30', '2012-05-31'],
              dtype='datetime64[ns]', freq='BM')

In [63]:
pd.date_range('2012-05-02 12:56:31', periods = 5)

DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

#### time noramlized to midnight

In [64]:
pd.date_range('2012-05-02 12:56:31', periods = 5, normalize = True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

## Frequencies and Date Offsets

In [65]:
from pandas.tseries.offsets import Hour, Minute

In [66]:
hour = Hour()
hour

<Hour>

In [67]:
four_hours = Hour(4)
four_hours

<4 * Hours>

In [68]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq = '4h')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

#### combining offsets

In [69]:
Hour(2) + Minute(30)

<150 * Minutes>

In [70]:
pd.date_range('2000-01-01', periods=10, freq='1h30min')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')

###### week of month dates

In [71]:
rng = pd.date_range('2012-01-01', '2012-09-01', freq = 'WOM-3FRI')

In [72]:
list(rng)

[Timestamp('2012-01-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-02-17 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-03-16 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-04-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-05-18 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-06-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-07-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2012-08-17 00:00:00', freq='WOM-3FRI')]

### Shifting (Leading and Lagging) Data

In [73]:
ts = pd.Series(np.random.randn(4),
               index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts

2000-01-31   -1.105586
2000-02-29   -1.977948
2000-03-31   -0.671033
2000-04-30    0.258435
Freq: M, dtype: float64

In [74]:
ts.shift(2)

2000-01-31         NaN
2000-02-29         NaN
2000-03-31   -1.105586
2000-04-30   -1.977948
Freq: M, dtype: float64

In [75]:
ts.shift(-2)

2000-01-31   -0.671033
2000-02-29    0.258435
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

In [76]:
ts / ts.shift(1) - 1

2000-01-31         NaN
2000-02-29    0.789050
2000-03-31   -0.660743
2000-04-30   -1.385129
Freq: M, dtype: float64

In [77]:
ts.shift(2, freq = 'M')

2000-03-31   -1.105586
2000-04-30   -1.977948
2000-05-31   -0.671033
2000-06-30    0.258435
Freq: M, dtype: float64

In [78]:
ts.shift(3, freq = 'D')

2000-02-03   -1.105586
2000-03-03   -1.977948
2000-04-03   -0.671033
2000-05-03    0.258435
dtype: float64

In [79]:
ts.shift(1, freq = '90T')

2000-01-31 01:30:00   -1.105586
2000-02-29 01:30:00   -1.977948
2000-03-31 01:30:00   -0.671033
2000-04-30 01:30:00    0.258435
Freq: M, dtype: float64

### Shifting dates with offsets

In [80]:
from pandas.tseries.offsets import Day, MonthEnd

In [81]:
now = datetime(2011, 11, 17)

In [82]:
now + 3 * Day()

Timestamp('2011-11-20 00:00:00')

In [83]:
now + MonthEnd()

Timestamp('2011-11-30 00:00:00')

In [84]:
now + MonthEnd(2)

Timestamp('2011-12-31 00:00:00')

In [85]:
offset = MonthEnd()

In [86]:
offset.rollforward(now)

Timestamp('2011-11-30 00:00:00')

In [87]:
offset.rollback(now)

Timestamp('2011-10-31 00:00:00')

In [88]:
ts = pd.Series(np.random.randn(20),
               index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts

2000-01-15   -1.361511
2000-01-19    0.791980
2000-01-23    0.012882
2000-01-27   -1.197835
2000-01-31    0.541937
2000-02-04   -1.381556
2000-02-08   -0.837757
2000-02-12    0.682511
2000-02-16   -0.844621
2000-02-20   -0.731672
2000-02-24    0.182404
2000-02-28   -0.087418
2000-03-03    0.960466
2000-03-07    0.746441
2000-03-11    0.030532
2000-03-15    0.702837
2000-03-19   -2.110927
2000-03-23    1.885619
2000-03-27   -0.750163
2000-03-31   -1.149977
Freq: 4D, dtype: float64

In [89]:
ts.groupby(offset.rollforward).mean()

2000-01-31   -0.242509
2000-02-29   -0.431159
2000-03-31    0.039353
dtype: float64

In [90]:
ts.resample('M').mean()

2000-01-31   -0.242509
2000-02-29   -0.431159
2000-03-31    0.039353
Freq: M, dtype: float64

# Time Zone Handling

In [91]:
import pytz

In [92]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [93]:
tz = pytz.timezone('America/New_York')
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

#### Time Zone Localization and Conversion

In [94]:
rng = pd.date_range('3/9/2012', periods = 6, freq = 'D')

In [95]:
ts = pd.Series(np.random.randn(len(rng)), index = rng)
ts

2012-03-09   -0.440566
2012-03-10   -0.580992
2012-03-11   -0.301589
2012-03-12   -0.196607
2012-03-13   -0.017379
2012-03-14   -0.599406
Freq: D, dtype: float64

In [96]:
print(ts.index.tz)

None


In [97]:
pd.date_range('3/9/2012 9:30', periods = 10, freq = 'D', tz = 'UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [98]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2012-03-09 00:00:00+00:00   -0.440566
2012-03-10 00:00:00+00:00   -0.580992
2012-03-11 00:00:00+00:00   -0.301589
2012-03-12 00:00:00+00:00   -0.196607
2012-03-13 00:00:00+00:00   -0.017379
2012-03-14 00:00:00+00:00   -0.599406
Freq: D, dtype: float64

In [99]:
ts_utc.index

DatetimeIndex(['2012-03-09 00:00:00+00:00', '2012-03-10 00:00:00+00:00',
               '2012-03-11 00:00:00+00:00', '2012-03-12 00:00:00+00:00',
               '2012-03-13 00:00:00+00:00', '2012-03-14 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [100]:
ts_utc.tz_convert('America/New_York')

2012-03-08 19:00:00-05:00   -0.440566
2012-03-09 19:00:00-05:00   -0.580992
2012-03-10 19:00:00-05:00   -0.301589
2012-03-11 20:00:00-04:00   -0.196607
2012-03-12 20:00:00-04:00   -0.017379
2012-03-13 20:00:00-04:00   -0.599406
Freq: D, dtype: float64

In [101]:
ts_eastern = ts.tz_localize('America/New_York')

In [102]:
ts_eastern.tz_convert('UTC')

2012-03-09 05:00:00+00:00   -0.440566
2012-03-10 05:00:00+00:00   -0.580992
2012-03-11 05:00:00+00:00   -0.301589
2012-03-12 04:00:00+00:00   -0.196607
2012-03-13 04:00:00+00:00   -0.017379
2012-03-14 04:00:00+00:00   -0.599406
Freq: D, dtype: float64

In [103]:
ts_eastern.tz_convert('Europe/Berlin')

2012-03-09 06:00:00+01:00   -0.440566
2012-03-10 06:00:00+01:00   -0.580992
2012-03-11 06:00:00+01:00   -0.301589
2012-03-12 05:00:00+01:00   -0.196607
2012-03-13 05:00:00+01:00   -0.017379
2012-03-14 05:00:00+01:00   -0.599406
Freq: D, dtype: float64

In [104]:
ts.index.tz_localize('Asia/Shanghai')

DatetimeIndex(['2012-03-09 00:00:00+08:00', '2012-03-10 00:00:00+08:00',
               '2012-03-11 00:00:00+08:00', '2012-03-12 00:00:00+08:00',
               '2012-03-13 00:00:00+08:00', '2012-03-14 00:00:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq='D')

### Operations with Time Zone−Aware Timestamp Objects

In [105]:
stamp = pd.Timestamp('2011-03-12 04:00')

In [106]:
stamp_utc = stamp.tz_localize('utc')

In [107]:
stamp_utc.tz_convert('America/New_York')

Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')

In [108]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')

In [109]:
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

In [110]:
stamp_utc.value

1299902400000000000

In [111]:
stamp_utc.tz_convert('America/New_York').value

1299902400000000000

In [112]:
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp

Timestamp('2012-03-12 01:30:00-0400', tz='US/Eastern')

In [113]:
stamp + Hour()

Timestamp('2012-03-12 02:30:00-0400', tz='US/Eastern')

In [114]:
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp

Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [115]:
stamp + 2 * Hour()

Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

### Operations Between Different Time Zones

In [116]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')

In [117]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-03-07 09:30:00   -0.285937
2012-03-08 09:30:00   -0.223595
2012-03-09 09:30:00   -1.866696
2012-03-12 09:30:00    1.062436
2012-03-13 09:30:00    0.312554
2012-03-14 09:30:00   -1.750828
2012-03-15 09:30:00    0.908282
2012-03-16 09:30:00    0.554433
2012-03-19 09:30:00   -0.201892
2012-03-20 09:30:00    0.574521
Freq: B, dtype: float64

In [118]:
ts1 = ts[:7].tz_localize('Europe/London')

In [119]:
ts2 = ts1[2:].tz_convert('Europe/Moscow')

In [120]:
result = ts1 + ts2

In [121]:
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='B')

# Periods and Period Arithmetic

In [122]:
p = pd.Period(2007, freq='A-DEC')

In [123]:
p

Period('2007', 'A-DEC')

In [124]:
p + 5

Period('2012', 'A-DEC')

In [125]:
p - 2

Period('2005', 'A-DEC')

In [126]:
pd.Period('2014', freq='A-DEC') - p

<7 * YearEnds: month=12>

In [127]:
rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')

In [128]:
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [129]:
pd.Series(np.random.randn(6), index=rng)

2000-01   -1.049229
2000-02    0.482924
2000-03   -0.070905
2000-04   -0.002193
2000-05    1.086930
2000-06   -0.838728
Freq: M, dtype: float64

In [130]:
values = ['2001Q3', '2002Q2', '2003Q1']

In [131]:
index = pd.PeriodIndex(values, freq='Q-DEC')

In [132]:
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

### Period Frequency Conversion

In [133]:
p = pd.Period('2007', freq='A-DEC')

In [134]:
p

Period('2007', 'A-DEC')

In [135]:
p.asfreq('M', how='start')

Period('2007-01', 'M')

In [136]:
p.asfreq('M', how='end')

Period('2007-12', 'M')

In [137]:
p = pd.Period('2007', freq='A-JUN')

In [138]:
p

Period('2007', 'A-JUN')

In [139]:
p.asfreq('M', 'start')

Period('2006-07', 'M')

In [140]:
p.asfreq('M', 'end')

Period('2007-06', 'M')

In [141]:
p = pd.Period('Aug-2007', 'M')

In [142]:
p.asfreq('A-JUN')

Period('2008', 'A-JUN')

In [143]:
rng = pd.period_range('2006', '2009', freq='A-DEC')

In [144]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2006   -0.078478
2007   -1.982084
2008   -0.576801
2009    0.337072
Freq: A-DEC, dtype: float64

In [145]:
ts.asfreq('M', how='start')

2006-01   -0.078478
2007-01   -1.982084
2008-01   -0.576801
2009-01    0.337072
Freq: M, dtype: float64

In [146]:
ts.asfreq('B', how='end')

2006-12-29   -0.078478
2007-12-31   -1.982084
2008-12-31   -0.576801
2009-12-31    0.337072
Freq: B, dtype: float64

### Quarterly Period Frequencies

In [147]:
p = pd.Period('2012Q4', freq='Q-JAN')

In [148]:
p

Period('2012Q4', 'Q-JAN')

In [149]:
p.asfreq('D', 'start')

Period('2011-11-01', 'D')

In [150]:
p.asfreq('D', 'end')

Period('2012-01-31', 'D')

In [151]:
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60

In [152]:
p4pm

Period('2012-01-30 16:00', 'T')

In [153]:
p4pm.to_timestamp()

Timestamp('2012-01-30 16:00:00')

In [154]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')

In [155]:
ts = pd.Series(np.arange(len(rng)), index=rng)
ts

2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int32

In [156]:
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60

In [157]:
ts.index = new_rng.to_timestamp()

In [158]:
ts

2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int32

### Converting Timestamps to Periods (and Back)

In [159]:
rng = pd.date_range('2000-01-01', periods=3, freq='M')

In [160]:
ts = pd.Series(np.random.randn(3), index=rng)

In [161]:
ts

2000-01-31   -0.360067
2000-02-29    0.471873
2000-03-31    1.064727
Freq: M, dtype: float64

In [162]:
pts = ts.to_period()
pts

2000-01   -0.360067
2000-02    0.471873
2000-03    1.064727
Freq: M, dtype: float64

In [163]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')

In [164]:
ts2 = pd.Series(np.random.randn(6), index=rng)

In [165]:
ts2

2000-01-29   -0.731633
2000-01-30    0.642108
2000-01-31   -0.364014
2000-02-01   -0.734407
2000-02-02   -0.005475
2000-02-03    1.092611
Freq: D, dtype: float64

In [166]:
ts2.to_period('M')

2000-01   -0.731633
2000-01    0.642108
2000-01   -0.364014
2000-02   -0.734407
2000-02   -0.005475
2000-02    1.092611
Freq: M, dtype: float64

In [167]:
pts = ts2.to_period()

In [168]:
pts

2000-01-29   -0.731633
2000-01-30    0.642108
2000-01-31   -0.364014
2000-02-01   -0.734407
2000-02-02   -0.005475
2000-02-03    1.092611
Freq: D, dtype: float64

In [169]:
pts.to_timestamp(how='end')

2000-01-29 23:59:59.999999999   -0.731633
2000-01-30 23:59:59.999999999    0.642108
2000-01-31 23:59:59.999999999   -0.364014
2000-02-01 23:59:59.999999999   -0.734407
2000-02-02 23:59:59.999999999   -0.005475
2000-02-03 23:59:59.999999999    1.092611
Freq: D, dtype: float64

### Creating a PeriodIndex from Arrays

In [170]:
data = pd.read_csv('pydata-book-2nd-edition/examples/macrodata.csv')

In [171]:
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [172]:
data.year

0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
5      1960.0
6      1960.0
7      1960.0
8      1961.0
9      1961.0
10     1961.0
11     1961.0
12     1962.0
13     1962.0
14     1962.0
15     1962.0
16     1963.0
17     1963.0
18     1963.0
19     1963.0
20     1964.0
21     1964.0
22     1964.0
23     1964.0
24     1965.0
25     1965.0
26     1965.0
27     1965.0
28     1966.0
29     1966.0
        ...  
173    2002.0
174    2002.0
175    2002.0
176    2003.0
177    2003.0
178    2003.0
179    2003.0
180    2004.0
181    2004.0
182    2004.0
183    2004.0
184    2005.0
185    2005.0
186    2005.0
187    2005.0
188    2006.0
189    2006.0
190    2006.0
191    2006.0
192    2007.0
193    2007.0
194    2007.0
195    2007.0
196    2008.0
197    2008.0
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, Length: 203, dtype: float64

In [173]:
data.quarter

0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
5      2.0
6      3.0
7      4.0
8      1.0
9      2.0
10     3.0
11     4.0
12     1.0
13     2.0
14     3.0
15     4.0
16     1.0
17     2.0
18     3.0
19     4.0
20     1.0
21     2.0
22     3.0
23     4.0
24     1.0
25     2.0
26     3.0
27     4.0
28     1.0
29     2.0
      ... 
173    2.0
174    3.0
175    4.0
176    1.0
177    2.0
178    3.0
179    4.0
180    1.0
181    2.0
182    3.0
183    4.0
184    1.0
185    2.0
186    3.0
187    4.0
188    1.0
189    2.0
190    3.0
191    4.0
192    1.0
193    2.0
194    3.0
195    4.0
196    1.0
197    2.0
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64

In [174]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [175]:
data.index = index

In [176]:
data.infl

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
1960Q2    0.14
1960Q3    2.70
1960Q4    1.21
1961Q1   -0.40
1961Q2    1.47
1961Q3    0.80
1961Q4    0.80
1962Q1    2.26
1962Q2    0.13
1962Q3    2.11
1962Q4    0.79
1963Q1    0.53
1963Q2    2.75
1963Q3    0.78
1963Q4    2.46
1964Q1    0.13
1964Q2    0.90
1964Q3    1.29
1964Q4    2.05
1965Q1    1.28
1965Q2    2.54
1965Q3    0.89
1965Q4    2.90
1966Q1    4.99
1966Q2    2.10
          ... 
2002Q2    1.56
2002Q3    2.66
2002Q4    3.08
2003Q1    1.31
2003Q2    1.09
2003Q3    2.60
2003Q4    3.02
2004Q1    2.35
2004Q2    3.61
2004Q3    3.58
2004Q4    2.09
2005Q1    4.15
2005Q2    1.85
2005Q3    9.14
2005Q4    0.40
2006Q1    2.60
2006Q2    3.97
2006Q3   -1.58
2006Q4    3.30
2007Q1    4.58
2007Q2    2.75
2007Q3    3.45
2007Q4    6.38
2008Q1    2.82
2008Q2    8.53
2008Q3   -3.16
2008Q4   -8.79
2009Q1    0.94
2009Q2    3.37
2009Q3    3.56
Freq: Q-DEC, Name: infl, Length: 203, dtype: float64

# Resampling and Frequency Conversion

In [177]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')

In [178]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2000-01-01    0.819332
2000-01-02   -1.715292
2000-01-03   -0.303608
2000-01-04   -0.502745
2000-01-05   -0.923817
2000-01-06   -0.127744
2000-01-07   -0.064116
2000-01-08    1.181054
2000-01-09    1.267019
2000-01-10   -0.558594
2000-01-11    0.032791
2000-01-12    1.513176
2000-01-13    0.049295
2000-01-14   -0.544099
2000-01-15    0.126081
2000-01-16   -0.398562
2000-01-17    0.308980
2000-01-18   -0.437398
2000-01-19    0.355331
2000-01-20   -0.872407
2000-01-21    2.375946
2000-01-22   -0.253626
2000-01-23    0.190149
2000-01-24    0.566178
2000-01-25    1.161567
2000-01-26   -0.311049
2000-01-27   -1.876139
2000-01-28   -1.224101
2000-01-29    1.613030
2000-01-30    1.596815
                ...   
2000-03-11   -0.554805
2000-03-12    0.768800
2000-03-13   -2.050477
2000-03-14    1.035245
2000-03-15   -2.013595
2000-03-16    1.719789
2000-03-17    0.786447
2000-03-18   -0.893810
2000-03-19   -0.138916
2000-03-20   -0.762206
2000-03-21    2.543446
2000-03-22    0.220169
2000-03-23 

In [179]:
ts.resample('M').mean()

2000-01-31    0.148721
2000-02-29   -0.106639
2000-03-31   -0.090838
2000-04-30   -0.386549
Freq: M, dtype: float64

In [180]:
ts.resample('M', kind='period').mean()

2000-01    0.148721
2000-02   -0.106639
2000-03   -0.090838
2000-04   -0.386549
Freq: M, dtype: float64