In [4]:
import datetime
import pandas as pd
import numpy as np

In [2]:
# Time Series information can be parsed from various sources and formates

In [5]:
dti = pd.to_datetime(
["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018,1,1)])

In [6]:
dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

In [7]:
# Generating sequencees of fixed-frequebcy dates and time spans #

In [10]:
fixed_frq_dates = pd.date_range("2018-01-01", periods=6, freq="H")

In [11]:
fixed_frq_dates

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00'],
              dtype='datetime64[ns]', freq='H')

In [13]:
# Converting to UTC time #

In [12]:
fixed_frq_dates.tz_localize("UTC")

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00', '2018-01-01 03:00:00+00:00',
               '2018-01-01 04:00:00+00:00', '2018-01-01 05:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='H')

In [14]:
# Converting to local time zone #

In [16]:
fixed_frq_dates.tz_localize("UTC").tz_convert("US/Pacific")

DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',
               '2017-12-31 18:00:00-08:00', '2017-12-31 19:00:00-08:00',
               '2017-12-31 20:00:00-08:00', '2017-12-31 21:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='H')

In [17]:
# Resampling or converting a time series to a particular frequency

In [19]:
idx = pd.date_range(start="2018-01-01", periods=5, freq="H")

In [22]:
ts = pd.Series(range(len(idx)), index=idx)

In [49]:
# ts obj. is sampled with hourly frequency
# let's resampled/downsample it with 2 Hour freq, and calc. mean
# Resampling is akin to groupBy with indices on index
# ts.resample("2H").groups will provide group key

In [42]:
ts.resample("2H").mean()

2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64

In [47]:
ts

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: H, dtype: int64

In [46]:
ts.resample("2H").groups

{Timestamp('2018-01-01 00:00:00', freq='2H'): 2,
 Timestamp('2018-01-01 02:00:00', freq='2H'): 4,
 Timestamp('2018-01-01 04:00:00', freq='2H'): 5}

In [37]:
ts.resample("2H").get_group(name='2018-01-01 00:00:00')

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
Freq: H, dtype: int64

In [39]:
ts.resample("2H").get_group(name='2018-01-01 02:00:00')

2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
Freq: H, dtype: int64

In [40]:
ts.resample("2H").get_group(name='2018-01-01 04:00:00')

2018-01-01 04:00:00    4
Freq: H, dtype: int64

In [51]:
# Doing Date and Time Arithemetic with abs or relative time increments #

In [64]:
friday = pd.Timestamp("2018-01-05")

In [65]:
friday.day_name()

'Friday'

In [66]:
saturday = friday + pd.Timedelta("1 day")

In [67]:
saturday.day_name()

'Saturday'

In [68]:
friday

Timestamp('2018-01-05 00:00:00')

In [69]:
monday = friday + pd.offsets.BDay()

In [70]:
monday.day_name()

'Monday'

## Overview

1. Date times: Similar to `datetime.datetime` from the standard library
2. Time delta: Similar to `datetime.timedelta`from python
3. Time spans: A span of time defined with req. frequency


In [73]:
# Series with time component in index #
pd.Series(range(4), index=pd.date_range(start="2000",
                                        freq="D", periods=4))

2000-01-01    0
2000-01-02    1
2000-01-03    2
2000-01-04    3
Freq: D, dtype: int64

In [74]:
pd.Series(pd.date_range(start="2000",freq="D", periods=4))

0   2000-01-01
1   2000-01-02
2   2000-01-03
3   2000-01-04
dtype: datetime64[ns]

In [75]:
pd.Series(pd.period_range(start="2000",freq="D", periods=4))

0    2000-01-01
1    2000-01-02
2    2000-01-03
3    2000-01-04
dtype: period[D]

In [76]:
# Null date times, time deltas and time spans as NAT
# can be usefull representing missing or null date like 

In [78]:
pd.Timestamp(pd.NaT)

NaT

In [80]:
pd.Timedelta(pd.NaT)

NaT

In [81]:
pd.Period(pd.NaT)

NaT

In [82]:
pd.NaT == pd.NaT

False

In [83]:
np.nan == np.nan

False

## Timestamps vs. time spans

**Timestamp:**
When values are associated with time:
> Like a graph between: `Temperature vs Time`

**timespans:**
Change in variables can be associated with a time span

In [85]:
pd.Timestamp(datetime.datetime(2012, 5, 1))

Timestamp('2012-05-01 00:00:00')

In [86]:
pd.Timestamp("2012-05-01")

Timestamp('2012-05-01 00:00:00')

In [87]:
pd.Timestamp(2012, 5, 1)

Timestamp('2012-05-01 00:00:00')

In [88]:
pd.Period("2011-01")

Period('2011-01', 'M')

In [89]:
pd.Period("2012-05", freq="D")

Period('2012-05-01', 'D')

In [91]:
# Both timestamp and period can serve as an index #
date = [pd.Timestamp("2012-05-01"),
        pd.Timestamp("2012-05-02"),
        pd.Timestamp("2012-05-03")]

In [93]:
ts = pd.Series(np.random.randn(3), date)

In [94]:
ts

2012-05-01   -0.350305
2012-05-02   -0.224016
2012-05-03   -1.233673
dtype: float64

In [95]:
type(ts.index)

pandas.core.indexes.datetimes.DatetimeIndex