In [1]:
import pandas as pd
import numpy as np

# TIMES


In [2]:
# M = month, but can also do D (days), H (hours), MS (start of month), B (business days)
rng = pd.date_range('2016 Jul 15 10:15', periods = 10, freq = 'M')
rng

DatetimeIndex(['2016-07-31 10:15:00', '2016-08-31 10:15:00',
               '2016-09-30 10:15:00', '2016-10-31 10:15:00',
               '2016-11-30 10:15:00', '2016-12-31 10:15:00',
               '2017-01-31 10:15:00', '2017-02-28 10:15:00',
               '2017-03-31 10:15:00', '2017-04-30 10:15:00'],
              dtype='datetime64[ns]', freq='M')

In [3]:
rng = pd.date_range(start = '2016 Jul 15 10:15',
                    end = '2016 Jul 25',
                    freq = '12H',
                    tz = 'Asia/Hong_Kong')
rng

DatetimeIndex(['2016-07-15 10:15:00+08:00', '2016-07-15 22:15:00+08:00',
               '2016-07-16 10:15:00+08:00', '2016-07-16 22:15:00+08:00',
               '2016-07-17 10:15:00+08:00', '2016-07-17 22:15:00+08:00',
               '2016-07-18 10:15:00+08:00', '2016-07-18 22:15:00+08:00',
               '2016-07-19 10:15:00+08:00', '2016-07-19 22:15:00+08:00',
               '2016-07-20 10:15:00+08:00', '2016-07-20 22:15:00+08:00',
               '2016-07-21 10:15:00+08:00', '2016-07-21 22:15:00+08:00',
               '2016-07-22 10:15:00+08:00', '2016-07-22 22:15:00+08:00',
               '2016-07-23 10:15:00+08:00', '2016-07-23 22:15:00+08:00',
               '2016-07-24 10:15:00+08:00', '2016-07-24 22:15:00+08:00'],
              dtype='datetime64[ns, Asia/Hong_Kong]', freq='12H')

In [4]:
# Which of these formats DON'T work?
#'2016 Jul 1', '7/1/2016', '1/7/2016', 'July 1, 2016', '2016-07-01', '2016/07/01'
# answer: 1/7/2016 (pandas interprets dates with '/' in the American way)

In [5]:
# Is '7/1/2016 in January or July?
# answer: July

In [6]:
# What is the class of an individual object held in the date_range?
type(rng[1])

pandas.tslib.Timestamp

# TIME STAMPS VS TIME SPANS

In [7]:
pd.Timestamp('2016-07-10')

Timestamp('2016-07-10 00:00:00')

In [8]:
# You can also more details 
pd.Timestamp('2016-07-10 10')

Timestamp('2016-07-10 10:00:00')

In [9]:
# Or even more... (down to nanoseconds)
pd.Timestamp('2016-07-10 10:15:15.1')

Timestamp('2016-07-10 10:15:15.100000')

In [10]:
# How much detail can you add?
pd.Timestamp('2016-07-10 10:15:15.123456789')

Timestamp('2016-07-10 10:15:15.123456789')

In [11]:
# What are some properties of timestamps? Try them out.
# hint: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-date-components
t = pd.Timestamp('2016-07-10 10:15 8pm')
print(t.month)
print(t.quarter)
print(t.weekday_name)

7
3
Sunday


# TIME SPANS

In [12]:
pd.Period('2016-01')

Period('2016-01', 'M')

In [13]:
# What's that extra info above? How does it get set?
# meta-data; (from documentation: the span represented by Period can be specified explicitly,
# or inferred from datetime string format.)

In [14]:
pd.Period('2016-01-01')

Period('2016-01-01', 'D')

In [15]:
pd.Period('2016-01-01 10')

Period('2016-01-01 10:00', 'H')

In [16]:
pd.Period('2016-01-01 10:10')

Period('2016-01-01 10:10', 'T')

In [17]:
pd.Period('2016-01-01 10:10:10')

Period('2016-01-01 10:10:10', 'S')

In [18]:
# What's the most detailed Period you can get?
# nanoseconds

In [19]:
# How can you make multiple time periods? 
pd.date_range('2016-01-01', '2016-01-10')

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

# TIME OFFSETS

In [20]:
pd.Timedelta('2M 1D')

Timedelta('1 days 00:02:00')

In [21]:
pd.Period('2016-01-01 10:10') + pd.Timedelta('1 day')

Period('2016-01-02 10:10', 'T')

In [22]:
pd.Timestamp('2016-01-01 10:10') + pd.Timedelta('1 day')

Timestamp('2016-01-02 10:10:00')

In [23]:
pd.Timestamp('2016-01-01 10:10') + pd.Timedelta('15 ns')

Timestamp('2016-01-01 10:10:00.000000015')

In [24]:
p = pd.Period('7/2016')
t = pd.Timestamp('7/21/2016')
p.start_time < t and p.end_time > t

True

# FANCY FREQUENCY SETTING

In [25]:
# Only want business days
pd.period_range('2016-01-01 10:10', freq = 'B', periods = 10)

PeriodIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
             '2016-01-07', '2016-01-08', '2016-01-11', '2016-01-12',
             '2016-01-13', '2016-01-14'],
            dtype='period[B]', freq='B')

In [26]:
# It's possible to combine frequencies. What if you want to advance by 25 hours each day. What are the 2 ways to do it?
p1 = pd.period_range('2016-01-01 10:10', freq = '25H', periods = 10)
p1

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [27]:
p2 = pd.period_range('2016-01-01 10:10', freq = '1D1H', periods = 10)
p2

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]', freq='25H')

In [28]:
# What are some other combos you can produce?
# hint: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases

p2 = pd.period_range('2016-01-01 10:10', freq = '2h20min', periods = 10)
p2

PeriodIndex(['2016-01-01 10:10', '2016-01-01 12:30', '2016-01-01 14:50',
             '2016-01-01 17:10', '2016-01-01 19:30', '2016-01-01 21:50',
             '2016-01-02 00:10', '2016-01-02 02:30', '2016-01-02 04:50',
             '2016-01-02 07:10'],
            dtype='period[140T]', freq='140T')

# INDEXING WITH TIME OBJECTS

In [29]:
# You can use these objects for indices
# Let's start with using a date range as above
rng = pd.date_range('2016 Jul 1', periods = 10, freq = 'D')
rng
pd.Series(range(len(rng)), index = rng)

2016-07-01    0
2016-07-02    1
2016-07-03    2
2016-07-04    3
2016-07-05    4
2016-07-06    5
2016-07-07    6
2016-07-08    7
2016-07-09    8
2016-07-10    9
Freq: D, dtype: int64

In [30]:
# You can also use time period indices, in cases where it makes more sense 
# to think about your index as a time span rather than a single point in time
periods = [pd.Period('2016-01'), pd.Period('2016-02'), pd.Period('2016-03')]
ts = pd.Series(np.random.randn(len(periods)), index = periods)
ts

2016-01   -1.105024
2016-02    1.761357
2016-03    0.797093
Freq: M, dtype: float64

In [31]:
# What type is the index for ts?
type(ts.index)

pandas.tseries.period.PeriodIndex

In [32]:
# Experiment with various indices
# Hint: does ts['2016'] work? 
ts['2016']

2016-01   -1.105024
2016-02    1.761357
2016-03    0.797093
Freq: M, dtype: float64

In [33]:
# Timestamped data can be convereted to period indices with to_period and vice versa with to_timestamp
ts = pd.Series(range(10), pd.date_range('07-10-16 8:00', periods = 10, freq = 'H'))
ts

2016-07-10 08:00:00    0
2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
2016-07-10 12:00:00    4
2016-07-10 13:00:00    5
2016-07-10 14:00:00    6
2016-07-10 15:00:00    7
2016-07-10 16:00:00    8
2016-07-10 17:00:00    9
Freq: H, dtype: int64

In [34]:
ts_period = ts.to_period()
ts_period

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
2016-07-10 12:00    4
2016-07-10 13:00    5
2016-07-10 14:00    6
2016-07-10 15:00    7
2016-07-10 16:00    8
2016-07-10 17:00    9
Freq: H, dtype: int64

In [35]:
ts_ts = ts_period.to_timestamp()
ts_ts

2016-07-10 08:00:00    0
2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
2016-07-10 12:00:00    4
2016-07-10 13:00:00    5
2016-07-10 14:00:00    6
2016-07-10 15:00:00    7
2016-07-10 16:00:00    8
2016-07-10 17:00:00    9
Freq: H, dtype: int64

In [36]:
ts_period['2016-07-10 08:30':'2016-07-10 11:45'] # we have the concept of overlap with time periods

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
Freq: H, dtype: int64

In [37]:
ts['2016-07-10 08:30':'2016-07-10 11:45'] # we have the concept of include with timestamps

2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
Freq: H, dtype: int64