# Group A Time Series with Pandas
### Import required modules

In [1]:
import pandas as pd
import numpy as np

### Create a DataFrame

In [6]:
df = pd.DataFrame()

df['germany_army'] = np.random.randint(low = 20000, high=30000, size=100)
df['allied_army'] = np.random.randint(low=20000, high=40000, size=100)
df.index = pd.date_range('1/1/2014', periods=100, freq='H')
df

Unnamed: 0,germany_army,allied_army
2014-01-01 00:00:00,23873,37118
2014-01-01 01:00:00,28286,24368
2014-01-01 02:00:00,26495,28464
2014-01-01 03:00:00,27071,24693
2014-01-01 04:00:00,22456,32495
2014-01-01 05:00:00,22014,34075
2014-01-01 06:00:00,29123,34422
2014-01-01 07:00:00,22366,26646
2014-01-01 08:00:00,20398,20017
2014-01-01 09:00:00,24099,26361


### Truncate the dataframe

In [8]:
df.truncate(before='1/2/2014', after='1/3/2014')

Unnamed: 0,germany_army,allied_army
2014-01-02 00:00:00,22485,36377
2014-01-02 01:00:00,25012,33901
2014-01-02 02:00:00,29716,37198
2014-01-02 03:00:00,21980,21710
2014-01-02 04:00:00,20253,20475
2014-01-02 05:00:00,23182,35929
2014-01-02 06:00:00,23692,32613
2014-01-02 07:00:00,23938,25594
2014-01-02 08:00:00,24241,20124
2014-01-02 09:00:00,24244,22500


### Set the dataframe's index

In [10]:
df.index = df.index + pd.DateOffset(months=4, days=5)

### View the dataframe

In [11]:
df.head()

Unnamed: 0,germany_army,allied_army
2014-05-06 00:00:00,23873,37118
2014-05-06 01:00:00,28286,24368
2014-05-06 02:00:00,26495,28464
2014-05-06 03:00:00,27071,24693
2014-05-06 04:00:00,22456,32495


### Lead a variable 1 hour

In [12]:
df.shift(1).head()

Unnamed: 0,germany_army,allied_army
2014-05-06 00:00:00,,
2014-05-06 01:00:00,23873.0,37118.0
2014-05-06 02:00:00,28286.0,24368.0
2014-05-06 03:00:00,26495.0,28464.0
2014-05-06 04:00:00,27071.0,24693.0


### Lag a variable 1 hour

In [13]:
df.shift(-1).tail()

Unnamed: 0,germany_army,allied_army
2014-05-09 23:00:00,26612.0,21135.0
2014-05-10 00:00:00,27804.0,39965.0
2014-05-10 01:00:00,22209.0,35950.0
2014-05-10 02:00:00,26881.0,25631.0
2014-05-10 03:00:00,,


### Aggregate into days by summing up the value of each hourly observation

In [14]:
df.resample('D').sum()

Unnamed: 0,germany_army,allied_army
2014-05-06,598786,730088
2014-05-07,583578,688413
2014-05-08,616680,712908
2014-05-09,597064,733861
2014-05-10,103506,122681


### Aggregate into days by taking the min value up the value of each hourly observation

In [15]:
df.resample('D').median()

Unnamed: 0,germany_army,allied_army
2014-05-06,24291.5,31807.0
2014-05-07,23773.5,27877.0
2014-05-08,26043.0,30058.5
2014-05-09,24720.5,30396.0
2014-05-10,26746.5,30790.5


### Aggregate into days by taking the first value of each day's worth of hourly observation

In [17]:
df.resample('D').first()

Unnamed: 0,germany_army,allied_army
2014-05-06,23873,37118
2014-05-07,22485,36377
2014-05-08,22893,24558
2014-05-09,24885,37922
2014-05-10,26612,21135


### Aggregate into days by taking the last value of each day's worth of hourly observation

In [18]:
df.resample('D').last()

Unnamed: 0,germany_army,allied_army
2014-05-06,27373,31864
2014-05-07,20518,23290
2014-05-08,22982,24927
2014-05-09,27610,26216
2014-05-10,26881,25631


### Aggregate into days by taking the first, last, highest, and lowest value of each day's worth of hourly observation

In [19]:
df.resample('D').ohlc()

Unnamed: 0_level_0,germany_army,germany_army,germany_army,germany_army,allied_army,allied_army,allied_army,allied_army
Unnamed: 0_level_1,open,high,low,close,open,high,low,close
2014-05-06,23873,29954,20231,27373,37118,39843,20017,31864
2014-05-07,22485,29768,20253,20518,36377,39419,20121,23290
2014-05-08,22893,29747,20587,22982,24558,39831,20010,24927
2014-05-09,24885,29497,20186,27610,37922,38806,20486,26216
2014-05-10,26612,27804,22209,26881,21135,39965,21135,25631


Source: https://chrisalbon.com/python/pandas_group_by_time.html