In [2]:
from datetime import datetime
import pandas as pd
import numpy as np

In [3]:
#  time series object in pandas is a Series indexed by timestamps
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
     datetime(2011, 1, 7), datetime(2011, 1, 8),
     datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02   -0.326558
2011-01-05    0.641027
2011-01-07    1.098709
2011-01-08    0.809111
2011-01-10    0.499802
2011-01-12   -0.486002
dtype: float64

In [4]:
# datetime index
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [5]:
# auto allign on index
ts + ts[::2] # every 2nd elemetn

2011-01-02   -0.653117
2011-01-05         NaN
2011-01-07    2.197419
2011-01-08         NaN
2011-01-10    0.999604
2011-01-12         NaN
dtype: float64

In [6]:
ts.index.dtype

dtype('<M8[ns]')

In [7]:
# scalar vals arr pd's timestamp obj (datetime -> timestamp)
ts.index[0]

Timestamp('2011-01-02 00:00:00')

### Indexing, Selection, Subsetting

In [8]:
stamp = ts.index[2]
ts[stamp]

1.098709419762574

In [9]:
# str as date tf
ts['20110110']


0.4998021420275295

In [11]:
# longer series only year/month to select data slice
longer_ts = pd.Series(np.random.randn(1000),
     index=pd.date_range('1/1/2000', periods=1000))
# pd.date_range('1/1/2000', periods=1000)
longer_ts

2000-01-01    0.309969
2000-01-02   -1.309000
2000-01-03   -0.184334
2000-01-04    0.187876
2000-01-05    0.251620
                ...   
2002-09-22    0.808414
2002-09-23    0.833929
2002-09-24   -0.326227
2002-09-25    0.656650
2002-09-26   -0.560711
Freq: D, Length: 1000, dtype: float64

In [12]:
# for year
longer_ts['2001']

2001-01-01   -1.436786
2001-01-02   -0.250697
2001-01-03   -0.138944
2001-01-04   -0.640874
2001-01-05    1.528580
                ...   
2001-12-27   -0.379689
2001-12-28    0.456526
2001-12-29   -1.969595
2001-12-30    0.054505
2001-12-31   -1.068848
Freq: D, Length: 365, dtype: float64

In [13]:
# year-mon
longer_ts['2001-10']


2001-10-01    0.583742
2001-10-02    0.497610
2001-10-03   -1.950727
2001-10-04    0.653021
2001-10-05    1.011854
2001-10-06    1.099687
2001-10-07    0.569533
2001-10-08    1.162444
2001-10-09    0.572447
2001-10-10    2.368572
2001-10-11    1.238840
2001-10-12   -1.387164
2001-10-13   -0.769207
2001-10-14    1.536771
2001-10-15    0.477130
2001-10-16    0.716553
2001-10-17    0.865783
2001-10-18    1.481634
2001-10-19   -1.003214
2001-10-20   -0.419831
2001-10-21    0.658136
2001-10-22   -0.809589
2001-10-23   -0.734396
2001-10-24    0.982885
2001-10-25    1.026414
2001-10-26   -0.497589
2001-10-27   -0.361687
2001-10-28   -1.095372
2001-10-29   -0.344998
2001-10-30    2.073477
2001-10-31    1.451932
Freq: D, dtype: float64

In [14]:
# datetime objs too
ts[datetime(2011,1,7):]

2011-01-07    1.098709
2011-01-08    0.809111
2011-01-10    0.499802
2011-01-12   -0.486002
dtype: float64

In [15]:
# select date rannge even if dates not prersent
ts

2011-01-02   -0.326558
2011-01-05    0.641027
2011-01-07    1.098709
2011-01-08    0.809111
2011-01-10    0.499802
2011-01-12   -0.486002
dtype: float64

In [16]:
ts['2011-01-06' : '2011-01-10']

2011-01-07    1.098709
2011-01-08    0.809111
2011-01-10    0.499802
dtype: float64

In [18]:
# truncate method slices between2 dates, beforeand after
ts.truncate(after="2011-01-08")

2011-01-02   -0.326558
2011-01-05    0.641027
2011-01-07    1.098709
2011-01-08    0.809111
dtype: float64

In [19]:
# DFs tooo
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED') #only wednesdays
long_df = pd.DataFrame(np.random.randn(100, 4),
        index=dates,
        columns=['Colorado', 'Texas',
        'New York', 'Ohio'])
long_df


Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-1.507448,-1.557895,1.020582,-1.626127
2000-01-12,-1.092347,0.704459,-0.707824,1.763829
2000-01-19,-0.245051,-1.404000,-0.842209,-0.367435
2000-01-26,0.113761,-0.162546,-1.243137,0.522090
2000-02-02,-0.474637,0.699714,-0.324656,-1.335147
...,...,...,...,...
2001-10-31,-0.713076,-1.281766,0.781555,2.409818
2001-11-07,0.221030,1.858471,-0.035997,-1.276466
2001-11-14,-0.569944,1.442005,-0.875474,0.066954
2001-11-21,0.651055,1.004967,-0.677983,0.956963


In [27]:
long_df.loc['2001-05']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,0.081552,-0.668151,-0.998547,-0.537564
2001-05-09,1.036852,-1.627542,-1.293198,-0.248474
2001-05-16,0.04276,-0.074008,-0.374271,-0.270692
2001-05-23,0.516473,-0.623928,-1.08099,1.131259
2001-05-30,-2.370843,1.237043,-1.36651,-0.153291


### Time series with duplicate indices

In [29]:
#  >1 datapt on a date
dates=pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
     '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [30]:
print(dup_ts.index.is_unique)
# selecting yields scalar or slice
dup_ts['2000-01-02']

False


2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [None]:
# grouping on same dates