In [1]:
from datetime import datetime
import pandas as pd
import numpy as np

In [2]:
#  time series object in pandas is a Series indexed by timestamps
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
     datetime(2011, 1, 7), datetime(2011, 1, 8),
     datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02    0.464818
2011-01-05   -0.683008
2011-01-07    0.692004
2011-01-08   -1.117441
2011-01-10    0.739387
2011-01-12   -0.802033
dtype: float64

In [3]:
# datetime index
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [4]:
# auto allign on index
ts + ts[::2] # every 2nd elemetn

2011-01-02    0.929635
2011-01-05         NaN
2011-01-07    1.384007
2011-01-08         NaN
2011-01-10    1.478774
2011-01-12         NaN
dtype: float64

In [5]:
ts.index.dtype

dtype('<M8[ns]')

In [6]:
# scalar vals arr pd's timestamp obj (datetime -> timestamp)
ts.index[0]

Timestamp('2011-01-02 00:00:00')

### Indexing, Selection, Subsetting

In [7]:
stamp = ts.index[2]
ts[stamp]

0.6920036003335818

In [8]:
# str as date tf
ts['20110110']


0.7393872029158588

In [9]:
# longer series only year/month to select data slice
longer_ts = pd.Series(np.random.randn(1000),
     index=pd.date_range('1/1/2000', periods=1000))
# pd.date_range('1/1/2000', periods=1000)
longer_ts

2000-01-01    0.855412
2000-01-02   -1.902517
2000-01-03    0.622428
2000-01-04   -1.319402
2000-01-05    1.067002
                ...   
2002-09-22    2.006236
2002-09-23   -0.309888
2002-09-24   -0.346562
2002-09-25   -1.053162
2002-09-26    1.109559
Freq: D, Length: 1000, dtype: float64

In [10]:
# for year
longer_ts['2001']

2001-01-01   -0.344024
2001-01-02    0.943250
2001-01-03   -0.117723
2001-01-04   -0.335685
2001-01-05   -1.192205
                ...   
2001-12-27   -0.216878
2001-12-28    1.187376
2001-12-29    0.446439
2001-12-30   -0.457286
2001-12-31   -2.526290
Freq: D, Length: 365, dtype: float64

In [11]:
# year-mon
longer_ts['2001-10']


2001-10-01   -0.222856
2001-10-02    0.341320
2001-10-03    0.231532
2001-10-04    0.044885
2001-10-05   -0.470794
2001-10-06   -1.474965
2001-10-07    0.548103
2001-10-08    0.142261
2001-10-09   -0.138381
2001-10-10   -0.798325
2001-10-11    2.393656
2001-10-12    0.511576
2001-10-13    1.480320
2001-10-14   -1.372453
2001-10-15   -1.732238
2001-10-16   -1.396605
2001-10-17   -1.281022
2001-10-18    0.006137
2001-10-19   -0.308184
2001-10-20   -1.207946
2001-10-21   -0.320710
2001-10-22    0.114300
2001-10-23   -0.242679
2001-10-24   -0.136143
2001-10-25   -0.074120
2001-10-26   -0.320529
2001-10-27    1.205081
2001-10-28   -0.163340
2001-10-29   -0.139649
2001-10-30    0.197359
2001-10-31   -0.578224
Freq: D, dtype: float64

In [12]:
# datetime objs too
ts[datetime(2011,1,7):]

2011-01-07    0.692004
2011-01-08   -1.117441
2011-01-10    0.739387
2011-01-12   -0.802033
dtype: float64

In [13]:
# select date rannge even if dates not prersent
ts

2011-01-02    0.464818
2011-01-05   -0.683008
2011-01-07    0.692004
2011-01-08   -1.117441
2011-01-10    0.739387
2011-01-12   -0.802033
dtype: float64

In [14]:
ts['2011-01-06' : '2011-01-10']

2011-01-07    0.692004
2011-01-08   -1.117441
2011-01-10    0.739387
dtype: float64

In [15]:
# truncate method slices between2 dates, beforeand after
ts.truncate(after="2011-01-08")

2011-01-02    0.464818
2011-01-05   -0.683008
2011-01-07    0.692004
2011-01-08   -1.117441
dtype: float64

In [16]:
# DFs tooo
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED') #only wednesdays
long_df = pd.DataFrame(np.random.randn(100, 4),
        index=dates,
        columns=['Colorado', 'Texas',
        'New York', 'Ohio'])
long_df


Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.879781,0.123492,-0.841356,1.185281
2000-01-12,-0.514400,-1.160327,2.335646,-0.232647
2000-01-19,0.401347,1.701656,1.219061,-2.088743
2000-01-26,-0.112335,0.795254,0.444728,-0.847320
2000-02-02,-0.940114,0.816062,-0.500418,0.106198
...,...,...,...,...
2001-10-31,1.134864,0.168993,-0.285628,-0.146698
2001-11-07,-0.475201,-0.130869,-0.253995,1.948625
2001-11-14,-1.429621,0.163892,1.006963,0.395838
2001-11-21,1.497706,-0.713783,0.842193,-0.350608


In [17]:
long_df.loc['2001-05']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-0.786718,1.482464,-0.554759,1.292947
2001-05-09,-0.102359,0.013806,0.122022,0.974628
2001-05-16,-1.237661,-0.714675,0.606572,1.714277
2001-05-23,-0.549768,-1.133598,0.577436,-0.879977
2001-05-30,0.647147,0.998811,0.905325,2.157905


### Time series with duplicate indices

In [18]:
#  >1 datapt on a date
dates=pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
     '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [19]:
print(dup_ts.index.is_unique)
# selecting yields scalar or slice
dup_ts['2000-01-02']

False


2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [21]:
# grouping on same dates
grouped = dup_ts.groupby(level=0)
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

In [22]:
grouped.mean()

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64