## Group A Time Series With pandas

### Import required modules


In [1]:
import pandas as pd
import numpy as np


### Create a dataframe

In [2]:
df = pd.DataFrame()

df['german_army'] = np.random.randint(low=20000, high=30000, size=100)
df['allied_army'] = np.random.randint(low=20000, high=40000, size=100)
df.index = pd.date_range('1/1/2014', periods=100, freq='H')

df.head()


Unnamed: 0,german_army,allied_army
2014-01-01 00:00:00,21984,25394
2014-01-01 01:00:00,28729,21479
2014-01-01 02:00:00,27992,36714
2014-01-01 03:00:00,22377,22986
2014-01-01 04:00:00,21478,32143


### Truncate the dataframe

In [3]:
df.truncate(before='1/2/2014', after='1/3/2014')


Unnamed: 0,german_army,allied_army
2014-01-02 00:00:00,27197,33748
2014-01-02 01:00:00,22282,37028
2014-01-02 02:00:00,29459,31293
2014-01-02 03:00:00,29960,21800
2014-01-02 04:00:00,28307,27744
2014-01-02 05:00:00,21089,36342
2014-01-02 06:00:00,23180,37807
2014-01-02 07:00:00,26166,31786
2014-01-02 08:00:00,26577,30650
2014-01-02 09:00:00,24859,24575


### Set the dataframe's index


In [6]:
df.index = df.index + pd.DateOffset(months=4, days=5)

### View the dataframe

In [7]:
df.head()

Unnamed: 0,german_army,allied_army
2014-09-11 00:00:00,21984,25394
2014-09-11 01:00:00,28729,21479
2014-09-11 02:00:00,27992,36714
2014-09-11 03:00:00,22377,22986
2014-09-11 04:00:00,21478,32143


### Lead a variable 1 hour

In [9]:
df.shift(1).head()

Unnamed: 0,german_army,allied_army
2014-09-11 00:00:00,,
2014-09-11 01:00:00,21984.0,25394.0
2014-09-11 02:00:00,28729.0,21479.0
2014-09-11 03:00:00,27992.0,36714.0
2014-09-11 04:00:00,22377.0,22986.0


### Lag a variable 1 hour

In [10]:
df.shift(-1).tail()

Unnamed: 0,german_army,allied_army
2014-09-14 23:00:00,24099.0,23333.0
2014-09-15 00:00:00,23693.0,34700.0
2014-09-15 01:00:00,23855.0,33489.0
2014-09-15 02:00:00,22935.0,37627.0
2014-09-15 03:00:00,,


### Aggregate into days by summing up the value of each hourly observation

In [11]:
df.resample('D').sum()

Unnamed: 0,german_army,allied_army
2014-09-11,596278,720076
2014-09-12,616417,711515
2014-09-13,602858,758088
2014-09-14,592241,680630
2014-09-15,94582,129149


### Aggregate into days by averaging up the value of each hourly observation

In [12]:
df.resample('D').mean()

Unnamed: 0,german_army,allied_army
2014-09-11,24844.916667,30003.166667
2014-09-12,25684.041667,29646.458333
2014-09-13,25119.083333,31587.0
2014-09-14,24676.708333,28359.583333
2014-09-15,23645.5,32287.25


### Aggregate into days by taking the min value up the value of each hourly observation

In [13]:
df.resample('D').min()

Unnamed: 0,german_army,allied_army
2014-09-11,20439,20015
2014-09-12,20990,20089
2014-09-13,20189,22235
2014-09-14,20459,20251
2014-09-15,22935,23333


### Aggregate into days by taking the median value of each day's worth of hourly observation

In [14]:
df.resample('D').first()

Unnamed: 0,german_army,allied_army
2014-09-11,21984,25394
2014-09-12,27197,33748
2014-09-13,27081,33539
2014-09-14,24614,38680
2014-09-15,24099,23333


### Aggregate into days by taking the last value of each day's worth of hourly observation

In [15]:
df.resample('D').last()

Unnamed: 0,german_army,allied_army
2014-09-11,20534,31871
2014-09-12,25528,28060
2014-09-13,27928,32349
2014-09-14,21195,22238
2014-09-15,22935,37627


### Aggregate into days by taking the first, last, highest, and lowest value of each day's worth of hourly observation

In [16]:
df.resample('D').ohlc()

Unnamed: 0_level_0,german_army,german_army,german_army,german_army,allied_army,allied_army,allied_army,allied_army
Unnamed: 0_level_1,open,high,low,close,open,high,low,close
2014-09-11,21984,29460,20439,20534,25394,39328,20015,31871
2014-09-12,27197,29960,20990,25528,33748,38794,20089,28060
2014-09-13,27081,29899,20189,27928,33539,39833,22235,32349
2014-09-14,24614,29734,20459,21195,38680,39515,20251,22238
2014-09-15,24099,24099,22935,22935,23333,37627,23333,37627
