In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [5]:
"""
Anything that is observed or measured at many points in time forms a time series. Many time series are fixed frequency
• Timestamps, specific instants in time
• Fixed periods, such as the month January 2007 or the full year 2010
• Intervals of time, indicated by a start and end timestamp. Periods can be thought
of as special cases of intervals
• Experiment or elapsed time; each timestamp is a measure of time relative to a
particular start time (e.g., the diameter of a cookie baking each second since
being placed in the oven)
"""
# Date and Time data types and tools
from datetime import datetime, timedelta
now = datetime.now()
now
now.year, now.month, now.day
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta
delta.days, delta.seconds
start = datetime(2011, 1, 7)
start + timedelta(12)   # days = 12

'\nAnything that is observed or measured at many points in time forms a time series. Many time series are fixed frequency\n• Timestamps, specific instants in time\n• Fixed periods, such as the month January 2007 or the full year 2010\n• Intervals of time, indicated by a start and end timestamp. Periods can be thought\nof as special cases of intervals\n• Experiment or elapsed time; each timestamp is a measure of time relative to a\nparticular start time (e.g., the diameter of a cookie baking each second since\nbeing placed in the oven)\n'

datetime.datetime(2020, 2, 16, 16, 18, 45, 487628)

(2020, 2, 16)

datetime.timedelta(926, 56700)

(926, 56700)

datetime.datetime(2011, 1, 19, 0, 0)

In [8]:
# converting between string and datetime
stamp = datetime(2011, 1, 3)
str(stamp)
stamp.strftime('%Y-%m-%d')
stamp.strftime('%y-%m-%d-%w')
stamp.strftime('%F')
"""
%Y Four-digit year
%y Two-digit year
%m Two-digit month [01, 12]
%d Two-digit day [01, 31]
%H Hour (24-hour clock) [00, 23]
%I Hour (12-hour clock) [01, 12]
%M Two-digit minute [00, 59]
%S Second [00, 61] (seconds 60, 61 account for leap seconds)
%w Weekday as integer [0 (Sunday), 6]
%U Week number of the year [00, 53]; Sunday is considered the first day of the week, and days before the first Sunday of
the year are “week 0”
%W Week number of the year [00, 53]; Monday is considered the first day of the week, and days before the first Monday of
the year are “week 0”
%z UTC time zone offset as +HHMM or -HHMM; empty if time zone naive
%F Shortcut for %Y-%m-%d (e.g., 2012-4-18)
%D Shortcut for %m/%d/%y (e.g., 04/18/12)
"""
# from str to datetime
value = '2011-01-03'
datetime.strptime(value, '%F')
# parser.parse method in the third-party dateutil package
from dateutil.parser import parse
parse('2011-01-03')
parse('Jan 31, 1997 10:45 PM')
parse('6/12/2011', dayfirst=True)   # day appearing before month
# dateutil.parser is a useful but imperfect tool. it will recognize some strings as dates that you might prefer that it didn't
# '42' will be parsed as the year 2042

'2011-01-03 00:00:00'

'2011-01-03'

'11-01-03-1'

'2011-01-03'

'\n%Y Four-digit year\n%y Two-digit year\n%m Two-digit month [01, 12]\n%d Two-digit day [01, 31]\n%H Hour (24-hour clock) [00, 23]\n%I Hour (12-hour clock) [01, 12]\n%M Two-digit minute [00, 59]\n%S Second [00, 61] (seconds 60, 61 account for leap seconds)\n%w Weekday as integer [0 (Sunday), 6]\n%U Week number of the year [00, 53]; Sunday is considered the first day of the week, and days before the first Sunday of\nthe year are “week 0”\n%W Week number of the year [00, 53]; Monday is considered the first day of the week, and days before the first Monday of\nthe year are “week 0”\n%z UTC time zone offset as +HHMM or -HHMM; empty if time zone naive\n%F Shortcut for %Y-%m-%d (e.g., 2012-4-18)\n%D Shortcut for %m/%d/%y (e.g., 04/18/12)\n'

In [4]:
import pandas as pd

datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']
pd.to_datetime(datestrs)
idx = pd.to_datetime(datestrs + [None])
idx
pd.isnull(idx)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

array([False, False,  True])

In [12]:
# time series basics
import numpy as np
from datetime import datetime

dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(3), index=idx)
ts
ts = pd.Series(np.random.randn(6), index=dates)
ts
ts.index
ts + ts[::2]
stamp = ts.index[0]
stamp

2011-07-06 12:00:00   -0.788390
2011-08-06 00:00:00   -1.369073
NaT                    0.044217
dtype: float64

2011-01-02    0.241353
2011-01-05   -1.124495
2011-01-07   -0.769710
2011-01-08    1.014994
2011-01-10    0.011631
2011-01-12   -0.988195
dtype: float64

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

2011-01-02    0.482706
2011-01-05         NaN
2011-01-07   -1.539420
2011-01-08         NaN
2011-01-10    0.023261
2011-01-12         NaN
dtype: float64

Timestamp('2011-01-02 00:00:00')

In [17]:
# indexing, selection, subsetting
stamp = ts.index[2]
stamp
ts[stamp]
# As a convenience, you can also pass a string that is interpretable as a date
ts['1/10/2011']
ts['20110110']

# for longer time series
longer_ts = pd.Series(np.random.randn(1000),
                      index=pd.date_range('1/1/2000', periods=1000))
longer_ts
longer_ts['2001']   # selects time period of whole 2001 year
# slicing with datetime object
ts[datetime(2011, 1, 7):]

Timestamp('2011-01-07 00:00:00')

-0.7697099387469818

0.01163065013699577

0.01163065013699577

2000-01-01   -1.584814
2000-01-02    0.592963
2000-01-03   -0.106804
2000-01-04   -0.659606
2000-01-05    1.065595
2000-01-06    0.724497
2000-01-07   -0.406729
2000-01-08    0.548382
2000-01-09    0.744132
2000-01-10   -1.906219
2000-01-11   -0.715561
2000-01-12    1.591292
2000-01-13    1.157389
2000-01-14   -0.050235
2000-01-15   -0.335627
2000-01-16    0.852874
2000-01-17   -2.653801
2000-01-18    1.451410
2000-01-19   -0.918924
2000-01-20    1.134049
2000-01-21    0.374802
2000-01-22   -0.022881
2000-01-23   -0.331475
2000-01-24   -0.659360
2000-01-25   -0.437885
2000-01-26    2.097430
2000-01-27    0.908668
2000-01-28    1.221123
2000-01-29   -0.101854
2000-01-30    0.535357
                ...   
2002-08-28    0.945968
2002-08-29   -0.752068
2002-08-30   -2.435870
2002-08-31   -0.216566
2002-09-01   -0.190708
2002-09-02   -0.699760
2002-09-03    0.393022
2002-09-04    0.689626
2002-09-05    0.025603
2002-09-06    2.942408
2002-09-07    0.175947
2002-09-08   -0.811778
2002-09-09 

2001-01-01    0.970215
2001-01-02   -0.603992
2001-01-03    2.369241
2001-01-04   -1.837530
2001-01-05    1.437161
2001-01-06    0.327961
2001-01-07   -0.369043
2001-01-08    0.918330
2001-01-09   -0.727845
2001-01-10    0.199112
2001-01-11    0.322122
2001-01-12   -0.485507
2001-01-13    0.350024
2001-01-14   -0.083758
2001-01-15   -1.150379
2001-01-16    0.902140
2001-01-17   -1.536772
2001-01-18    1.049771
2001-01-19   -0.327860
2001-01-20   -0.709047
2001-01-21    1.332727
2001-01-22    1.399755
2001-01-23   -0.493382
2001-01-24    0.796419
2001-01-25    0.605971
2001-01-26    0.323677
2001-01-27   -0.433454
2001-01-28    1.737380
2001-01-29    0.172487
2001-01-30    0.096060
                ...   
2001-12-02   -0.630782
2001-12-03    1.620075
2001-12-04   -0.163346
2001-12-05    0.213443
2001-12-06    0.749441
2001-12-07   -0.847627
2001-12-08   -1.728615
2001-12-09    1.463516
2001-12-10    1.161815
2001-12-11   -0.789612
2001-12-12    0.058826
2001-12-13   -0.392971
2001-12-14 

2011-01-07   -0.769710
2011-01-08    1.014994
2011-01-10    0.011631
2011-01-12   -0.988195
dtype: float64

In [19]:
# slice with timestamps not contained in a time series to perform a range query, slicing produces views
ts
ts['1/6/2011': '1/11/2011']
ts.truncate(after='1/9/2011')


2011-01-02    0.241353
2011-01-05   -1.124495
2011-01-07   -0.769710
2011-01-08    1.014994
2011-01-10    0.011631
2011-01-12   -0.988195
dtype: float64

2011-01-07   -0.769710
2011-01-08    1.014994
2011-01-10    0.011631
dtype: float64

In [21]:
# holds true for DataFrame as well, indexing on its rows
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
dates
long_df = pd.DataFrame(np.random.randn(100, 4),
                       index=dates,
                       columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])
long_df.loc['5-2001']

DatetimeIndex(['2000-01-05', '2000-01-12', '2000-01-19', '2000-01-26',
               '2000-02-02', '2000-02-09', '2000-02-16', '2000-02-23',
               '2000-03-01', '2000-03-08', '2000-03-15', '2000-03-22',
               '2000-03-29', '2000-04-05', '2000-04-12', '2000-04-19',
               '2000-04-26', '2000-05-03', '2000-05-10', '2000-05-17',
               '2000-05-24', '2000-05-31', '2000-06-07', '2000-06-14',
               '2000-06-21', '2000-06-28', '2000-07-05', '2000-07-12',
               '2000-07-19', '2000-07-26', '2000-08-02', '2000-08-09',
               '2000-08-16', '2000-08-23', '2000-08-30', '2000-09-06',
               '2000-09-13', '2000-09-20', '2000-09-27', '2000-10-04',
               '2000-10-11', '2000-10-18', '2000-10-25', '2000-11-01',
               '2000-11-08', '2000-11-15', '2000-11-22', '2000-11-29',
               '2000-12-06', '2000-12-13', '2000-12-20', '2000-12-27',
               '2001-01-03', '2001-01-10', '2001-01-17', '2001-01-24',
      

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-1.112818,-0.096935,0.004793,-0.328774
2001-05-09,0.320603,-0.595071,0.776346,-1.242355
2001-05-16,1.78832,0.403644,-1.364312,0.790722
2001-05-23,-1.335367,1.487993,1.050101,-0.817476
2001-05-30,2.069516,1.501538,-0.533321,-0.215638


In [24]:
# time series with duplicate indices
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts
dup_ts['1/2/2000']
grouped = dup_ts.groupby(level=0)
grouped.mean()


2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32