In [1]:
import numpy as np
import pandas as pd
import datetime

In [2]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.resample('5Min', label='right').mean()

2012-01-01 00:05:00    248.95
Freq: 5T, dtype: float64

In [3]:
s = pd.date_range('2000-01-01', '2000-01-05').to_series()
s.iloc[2] = pd.NaT
s.dt.day_name()

2000-01-01     Saturday
2000-01-02       Sunday
2000-01-03          NaN
2000-01-04      Tuesday
2000-01-05    Wednesday
Freq: D, dtype: object

In [8]:
s

2000-01-01   2000-01-01
2000-01-02   2000-01-02
2000-01-03          NaT
2000-01-04   2000-01-04
2000-01-05   2000-01-05
Freq: D, dtype: datetime64[ns]

In [9]:
ts

2012-01-01 00:00:00    260
2012-01-01 00:00:01    183
2012-01-01 00:00:02    488
2012-01-01 00:00:03    119
2012-01-01 00:00:04    421
                      ... 
2012-01-01 00:01:35    107
2012-01-01 00:01:36    440
2012-01-01 00:01:37     75
2012-01-01 00:01:38    361
2012-01-01 00:01:39    143
Freq: S, Length: 100, dtype: int32

In [12]:
ts[:2].resample('250L').asfreq()

2012-01-01 00:00:00.000    260.0
2012-01-01 00:00:00.250      NaN
2012-01-01 00:00:00.500      NaN
2012-01-01 00:00:00.750      NaN
2012-01-01 00:00:01.000    183.0
Freq: 250L, dtype: float64

In [18]:
rng = pd.date_range('2014-1-1', periods=100, freq='D') + pd.Timedelta('1s')
rng.to_series().head(3)

2014-01-01 00:00:01   2014-01-01 00:00:01
2014-01-02 00:00:01   2014-01-02 00:00:01
2014-01-03 00:00:01   2014-01-03 00:00:01
Freq: D, dtype: datetime64[ns]

In [19]:
ts = pd.Series(range(100), index=rng)
ts.resample('3T').sum()

2014-01-01 00:00:00     0
2014-01-01 00:03:00     0
2014-01-01 00:06:00     0
2014-01-01 00:09:00     0
2014-01-01 00:12:00     0
                       ..
2014-04-09 23:48:00     0
2014-04-09 23:51:00     0
2014-04-09 23:54:00     0
2014-04-09 23:57:00     0
2014-04-10 00:00:00    99
Freq: 3T, Length: 47521, dtype: int64

In [22]:
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), 'a': np.arange(5)},
                  index=pd.MultiIndex.from_arrays([[1,2,3,4,5], pd.date_range('2015-01-01', freq='W', periods=5)], names=['v', 'd']))
df

Unnamed: 0_level_0,Unnamed: 1_level_0,date,a
v,d,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2015-01-04,2015-01-04,0
2,2015-01-11,2015-01-11,1
3,2015-01-18,2015-01-18,2
4,2015-01-25,2015-01-25,3
5,2015-02-01,2015-02-01,4


In [21]:
df.resample('M', on='date').sum()

Unnamed: 0_level_0,a
date,Unnamed: 1_level_1
2015-01-31,6
2015-02-28,4


In [24]:
df.resample('M', level=1).sum()

Unnamed: 0_level_0,a
d,Unnamed: 1_level_1
2015-01-31,6
2015-02-28,4


In [25]:
small = pd.Series(range(6), index=pd.to_datetime([  "2017-01-01T00:00:00",
                                                    "2017-01-01T00:30:00",
                                                    "2017-01-01T00:31:00",
                                                    "2017-01-01T01:00:00",
                                                    "2017-01-01T03:00:00",
                                                    "2017-01-01T03:05:00"]))
resampled = small.resample('H')
for name, group in resampled:
    print('Group:', name)
    print('-' * 27)
    print(group, end = '\n\n')

Group: 2017-01-01 00:00:00
---------------------------
2017-01-01 00:00:00    0
2017-01-01 00:30:00    1
2017-01-01 00:31:00    2
dtype: int64

Group: 2017-01-01 01:00:00
---------------------------
2017-01-01 01:00:00    3
dtype: int64

Group: 2017-01-01 02:00:00
---------------------------
Series([], dtype: int64)

Group: 2017-01-01 03:00:00
---------------------------
2017-01-01 03:00:00    4
2017-01-01 03:05:00    5
dtype: int64



In [26]:
start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'
middle = '2000-10-02 00:00:00'
rng = pd.date_range(start, end, freq='7min')
ts = pd.Series(np.arange(len(rng)), index=rng)
ts.resample('17min').sum()

2000-10-01 23:14:00     0
2000-10-01 23:31:00     3
2000-10-01 23:48:00     7
2000-10-02 00:05:00    18
2000-10-02 00:22:00     8
Freq: 17T, dtype: int32

In [41]:
ts.resample('17min', origin='start').sum()

2000-10-01 23:30:00     3
2000-10-01 23:47:00     7
2000-10-02 00:04:00    18
2000-10-02 00:21:00     8
Freq: 17T, dtype: int32

In [29]:
ts[middle:end].resample('17min', origin='start_day').sum()

2000-10-02 00:00:00    11
2000-10-02 00:17:00    15
Freq: 17T, dtype: int32

In [30]:
ts[middle:end].resample('17min').sum()

2000-10-02 00:00:00    11
2000-10-02 00:17:00    15
Freq: 17T, dtype: int32

In [31]:
ts

2000-10-01 23:30:00    0
2000-10-01 23:37:00    1
2000-10-01 23:44:00    2
2000-10-01 23:51:00    3
2000-10-01 23:58:00    4
2000-10-02 00:05:00    5
2000-10-02 00:12:00    6
2000-10-02 00:19:00    7
2000-10-02 00:26:00    8
Freq: 7T, dtype: int32

In [38]:
s = pd.DataFrame(ts.index.time)
s

Unnamed: 0,0
0,23:30:00
1,23:37:00
2,23:44:00
3,23:51:00
4,23:58:00
5,00:05:00
6,00:12:00
7,00:19:00
8,00:26:00


In [40]:
ts.resample('1H').sum()

2000-10-01 23:00:00    10
2000-10-02 00:00:00    26
Freq: H, dtype: int32

In [52]:
start, end = '2000-10-01 17:28:00', '2000-10-01 18:32:00'
idx = pd.date_range(start, end, freq='7min')
ts1 = pd.Series(np.arange(len(idx)), index=idx)
ts1

2000-10-01 17:28:00    0
2000-10-01 17:35:00    1
2000-10-01 17:42:00    2
2000-10-01 17:49:00    3
2000-10-01 17:56:00    4
2000-10-01 18:03:00    5
2000-10-01 18:10:00    6
2000-10-01 18:17:00    7
2000-10-01 18:24:00    8
2000-10-01 18:31:00    9
Freq: 7T, dtype: int32

In [55]:
ts1.resample('30T', origin='end_day').sum()

2000-10-01 17:30:00     0
2000-10-01 18:00:00    10
2000-10-01 18:30:00    26
2000-10-01 19:00:00     9
Freq: 30T, dtype: int32

In [61]:
ceil_mid = idx.max().ceil('D')
freq = pd.offsets.Minute(17)
ceil_mid

Timestamp('2000-10-02 00:00:00')

In [68]:
ceil_mid - idx.max()

Timedelta('0 days 05:29:00')

In [71]:
ceil_mid - ((ceil_mid - idx.max())/ freq)*freq

Timestamp('2000-10-01 18:31:00')

In [66]:
(ceil_mid - idx.max())// freq

19

In [72]:
ceil_mid - ((ceil_mid - idx.max())// freq)* freq

Timestamp('2000-10-01 18:37:00')

In [75]:
p = pd.Period('2014-07-01 09:00', freq='H')
p + pd.offsets.Hour(2)

Period('2014-07-01 11:00', 'H')

In [76]:
prng = pd.period_range("1/1/2011", "1/1/2012", freq="M")
ps = pd.Series(np.random.randn(len(prng)), prng)
ps

2011-01   -0.454808
2011-02   -0.368664
2011-03   -0.008570
2011-04   -0.151392
2011-05   -0.562880
2011-06    0.965353
2011-07    0.317426
2011-08    0.297319
2011-09   -0.248882
2011-10   -0.566771
2011-11    0.153644
2011-12    1.248233
2012-01    0.392385
Freq: M, dtype: float64

In [81]:
np.random.choice(['a', 'b'], 2)

array(['a', 'a'], dtype='<U1')

In [87]:
# index=pd.MultiIndex.from_product([list('abcde'), np.random.choice([1,4], 2)])

In [89]:
df = pd.DataFrame(np.random.randn(10,3), index=pd.MultiIndex.from_product([list('abcde'), np.random.choice([1,4,5],2)]))
df

Unnamed: 0,Unnamed: 1,0,1,2
a,4,-0.109414,-0.38045,-1.13532
a,1,-0.955772,1.453154,-0.119474
b,4,-1.226255,-1.63943,0.08009
b,1,2.244107,-0.89558,-1.519574
c,4,1.348866,-0.080391,0.058733
c,1,-0.381895,1.644628,-0.389127
d,4,-0.874683,-1.265127,-0.322754
d,1,-2.442701,1.799368,0.036383
e,4,0.574734,-0.643097,-0.29235
e,1,-0.030464,0.67507,-0.488487


In [93]:
for i, j in df.groupby(level=1):
    print('Title:', i)
    print('content:', j)

Title: 1
content:             0         1         2
a 1 -0.955772  1.453154 -0.119474
b 1  2.244107 -0.895580 -1.519574
c 1 -0.381895  1.644628 -0.389127
d 1 -2.442701  1.799368  0.036383
e 1 -0.030464  0.675070 -0.488487
Title: 4
content:             0         1         2
a 4 -0.109414 -0.380450 -1.135320
b 4 -1.226255 -1.639430  0.080090
c 4  1.348866 -0.080391  0.058733
d 4 -0.874683 -1.265127 -0.322754
e 4  0.574734 -0.643097 -0.292350


In [101]:
df1 = df.groupby(level=1).get_group(4)
df1

Unnamed: 0,Unnamed: 1,0,1,2
a,4,-0.109414,-0.38045,-1.13532
b,4,-1.226255,-1.63943,0.08009
c,4,1.348866,-0.080391,0.058733
d,4,-0.874683,-1.265127,-0.322754
e,4,0.574734,-0.643097,-0.29235


In [102]:
ps

2011-01   -0.454808
2011-02   -0.368664
2011-03   -0.008570
2011-04   -0.151392
2011-05   -0.562880
2011-06    0.965353
2011-07    0.317426
2011-08    0.297319
2011-09   -0.248882
2011-10   -0.566771
2011-11    0.153644
2011-12    1.248233
2012-01    0.392385
Freq: M, dtype: float64

In [104]:
dfp = pd.DataFrame(np.random.randn(600, 1), index=pd.period_range('2013-01-01 09:00', freq='T', periods=600), columns=['A'])
dfp.loc['2013-01-01 10H'].head(3)

Unnamed: 0,A
2013-01-01 10:00,-1.057519
2013-01-01 10:01,-0.522349
2013-01-01 10:02,0.52547


In [106]:
dfp['YYYY-MM'] = dfp.index.asfreq('M')

In [107]:
dfp

Unnamed: 0,A,YYYY-MM
2013-01-01 09:00,-0.630555,2013-01
2013-01-01 09:01,0.202781,2013-01
2013-01-01 09:02,0.917546,2013-01
2013-01-01 09:03,1.016854,2013-01
2013-01-01 09:04,0.322568,2013-01
...,...,...
2013-01-01 18:55,0.788668,2013-01
2013-01-01 18:56,-1.471180,2013-01
2013-01-01 18:57,-0.492278,2013-01
2013-01-01 18:58,-0.093216,2013-01


In [108]:
dfp.index.asfreq('D')

PeriodIndex(['2013-01-01', '2013-01-01', '2013-01-01', '2013-01-01',
             '2013-01-01', '2013-01-01', '2013-01-01', '2013-01-01',
             '2013-01-01', '2013-01-01',
             ...
             '2013-01-01', '2013-01-01', '2013-01-01', '2013-01-01',
             '2013-01-01', '2013-01-01', '2013-01-01', '2013-01-01',
             '2013-01-01', '2013-01-01'],
            dtype='period[D]', length=600)

In [112]:
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
ts = pd.Series(np.random.randn(len(prng)), index=prng)
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
ts

1990-03-01 09:00    0.157081
1990-06-01 09:00    1.004570
1990-09-01 09:00   -0.232262
1990-12-01 09:00    2.172518
1991-03-01 09:00    0.483981
1991-06-01 09:00    1.466530
1991-09-01 09:00   -0.411179
1991-12-01 09:00   -1.416284
1992-03-01 09:00   -0.483605
1992-06-01 09:00    0.884201
1992-09-01 09:00    0.295986
1992-12-01 09:00   -1.371095
1993-03-01 09:00   -0.668123
1993-06-01 09:00    0.495792
1993-09-01 09:00    0.184778
1993-12-01 09:00   -0.874798
1994-03-01 09:00   -1.037237
1994-06-01 09:00   -1.051655
1994-09-01 09:00    2.015698
1994-12-01 09:00    1.602962
1995-03-01 09:00   -0.679494
1995-06-01 09:00   -0.540236
1995-09-01 09:00   -0.306885
1995-12-01 09:00    0.279357
1996-03-01 09:00   -0.350423
1996-06-01 09:00    0.648381
1996-09-01 09:00    0.069235
1996-12-01 09:00    1.001059
1997-03-01 09:00    1.137474
1997-06-01 09:00   -2.452650
1997-09-01 09:00   -0.298739
1997-12-01 09:00   -0.295293
1998-03-01 09:00    1.767770
1998-06-01 09:00    0.706692
1998-09-01 09:

In [111]:
prng.asfreq('M', 'e')

PeriodIndex(['1990-02', '1990-05', '1990-08', '1990-11', '1991-02', '1991-05',
             '1991-08', '1991-11', '1992-02', '1992-05', '1992-08', '1992-11',
             '1993-02', '1993-05', '1993-08', '1993-11', '1994-02', '1994-05',
             '1994-08', '1994-11', '1995-02', '1995-05', '1995-08', '1995-11',
             '1996-02', '1996-05', '1996-08', '1996-11', '1997-02', '1997-05',
             '1997-08', '1997-11', '1998-02', '1998-05', '1998-08', '1998-11',
             '1999-02', '1999-05', '1999-08', '1999-11', '2000-02', '2000-05',
             '2000-08', '2000-11'],
            dtype='period[M]')

In [113]:
import dateutil
rng_pytz = pd.date_range('3/6/2012 00:00', periods=3, freq='D', tz='Europe/London')
rng_pytz.tz

<DstTzInfo 'Europe/London' LMT-1 day, 23:59:00 STD>