In [5]:
from datetime import datetime
import pandas as pd
import numpy as np

In [2]:
dates = [datetime(1993, 1, 4), datetime(1993, 1, 5),
         datetime(1993, 1, 7), datetime(1993, 1, 10),
         datetime(1993, 1, 12), datetime(1993, 1, 20)]

In [7]:
ts = pd.Series(np.random.randn(6),index=dates)
ts

1993-01-04    0.461769
1993-01-05    0.525513
1993-01-07    0.185109
1993-01-10   -0.363211
1993-01-12   -0.689955
1993-01-20    1.148489
dtype: float64

In [8]:
ts.index

DatetimeIndex(['1993-01-04', '1993-01-05', '1993-01-07', '1993-01-10',
               '1993-01-12', '1993-01-20'],
              dtype='datetime64[ns]', freq=None)

In [9]:
ts+ts[::2] # get one from two

1993-01-04    0.923539
1993-01-05         NaN
1993-01-07    0.370219
1993-01-10         NaN
1993-01-12   -1.379911
1993-01-20         NaN
dtype: float64

In [10]:
ts.index.dtype  # pandas use numpy's datetime64 datetype to save timestamp by nanosecond

dtype('<M8[ns]')

## Indexing

In [11]:
stamp = ts.index[0]
stamp

Timestamp('1993-01-04 00:00:00')

In [12]:
ts[stamp]

0.46176931434225893

In [13]:
ts['1993-01-20']

1.1484889091765764

In [14]:
ts['19930120']

1.1484889091765764

## Cutting

In [15]:
longer_ts = pd.Series(np.random.randn(1000),index=pd.date_range('1/1/2000',periods=1000))
# randomly init 1000 days starting from 2000/1/1, allocate random value for each day

In [16]:
longer_ts

2000-01-01    0.409507
2000-01-02   -0.010673
2000-01-03    1.113391
2000-01-04   -0.200340
2000-01-05    1.199473
                ...   
2002-09-22    0.427394
2002-09-23    1.712553
2002-09-24    0.565135
2002-09-25    1.201138
2002-09-26    0.711947
Freq: D, Length: 1000, dtype: float64

In [18]:
longer_ts['2002']

2002-01-01    1.144938
2002-01-02   -0.453678
2002-01-03    1.404002
2002-01-04    0.257269
2002-01-05   -1.788627
                ...   
2002-09-22    0.427394
2002-09-23    1.712553
2002-09-24    0.565135
2002-09-25    1.201138
2002-09-26    0.711947
Freq: D, Length: 269, dtype: float64

In [20]:
longer_ts['2002/02'].head()

2002-02-01    0.933755
2002-02-02   -0.584742
2002-02-03   -1.866200
2002-02-04   -0.151467
2002-02-05    0.101152
Freq: D, dtype: float64

In [22]:
ts

1993-01-04    0.461769
1993-01-05    0.525513
1993-01-07    0.185109
1993-01-10   -0.363211
1993-01-12   -0.689955
1993-01-20    1.148489
dtype: float64

In [21]:
ts[datetime(1993,1,7):] # datetime can also be cut

1993-01-07    0.185109
1993-01-10   -0.363211
1993-01-12   -0.689955
1993-01-20    1.148489
dtype: float64

In [24]:
ts['1993-1-10':'1993-1-30']  # range cut

1993-01-10   -0.363211
1993-01-12   -0.689955
1993-01-20    1.148489
dtype: float64

In [26]:
# cut a DataFrame
dates = pd.date_range('2000/1/1',periods=100,freq='W-WED') # only Wednesday!
long_df = pd.DataFrame(np.random.randn(100,3),index=dates,columns=['Shanghai','Beijing','Paris'])
long_df.loc['2000/2']

Unnamed: 0,Shanghai,Beijing,Paris
2000-02-02,-0.624056,1.490416,-1.039576
2000-02-09,-1.013406,-0.992816,0.927098
2000-02-16,-1.588474,-0.432689,-0.793555
2000-02-23,-1.49612,-0.711273,1.912863


## delete duplicates

In [27]:
dates = pd.DatetimeIndex(['2000/1/1','2000/1/2','2000/1/2','2000/1/2','2000/1/3'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [28]:
# check duplicates
dup_ts.index.is_unique  # False means it contains duplicate dates!

False

In [29]:
dup_ts['2000-01-02']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [30]:
# get unique dates
grouped = dup_ts.groupby(level=0)
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int64

In [31]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64