In [130]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
range = pd.date_range('2015-01-01', '2015-01-02', freq='15min')
df = pd.DataFrame(index = range)

# Average speed in miles per hour
df['speed'] = np.random.randint(low=0, high=60, size=len(df.index))
# Distance in miles (speed * 0.5 hours)
df['distance'] = df['speed'] * 0.25 
# Cumulative distance travelled
df['cumulative_distance'] = df.distance.cumsum()

In [4]:
df.head()

Unnamed: 0,speed,distance,cumulative_distance
2015-01-01 00:00:00,18,4.5,4.5
2015-01-01 00:15:00,18,4.5,9.0
2015-01-01 00:30:00,49,12.25,21.25
2015-01-01 00:45:00,56,14.0,35.25
2015-01-01 01:00:00,39,9.75,45.0


In [7]:
df1 = df.resample("H").mean() #1 row per hour

In [48]:
df1.rolling(window=4, min_periods=4).cov().apply(lambda x: x)

<class 'pandas.core.panel.Panel'>
Dimensions: 25 (items) x 3 (major_axis) x 3 (minor_axis)
Items axis: 2015-01-01 00:00:00 to 2015-01-02 00:00:00
Major_axis axis: speed to cumulative_distance
Minor_axis axis: speed to cumulative_distance

### After taking the covariance at each time instant, the resulting structure is a `panel`

In [51]:
pnl = df1.rolling(window=4, min_periods=4).cov(pairwise=True)


<img src="https://raw.githubusercontent.com/mbonvini/EnergyAnalysisWithPandas/master/imgs/panel.png",width=300>

## Understanding how the panel works and how to iterate over it

In [70]:
print pnl.shape

(25, 3, 3)


In [76]:
print(pnl.items, pnl.major_axis, pnl.minor_axis)

(DatetimeIndex(['2015-01-01 00:00:00', '2015-01-01 01:00:00',
               '2015-01-01 02:00:00', '2015-01-01 03:00:00',
               '2015-01-01 04:00:00', '2015-01-01 05:00:00',
               '2015-01-01 06:00:00', '2015-01-01 07:00:00',
               '2015-01-01 08:00:00', '2015-01-01 09:00:00',
               '2015-01-01 10:00:00', '2015-01-01 11:00:00',
               '2015-01-01 12:00:00', '2015-01-01 13:00:00',
               '2015-01-01 14:00:00', '2015-01-01 15:00:00',
               '2015-01-01 16:00:00', '2015-01-01 17:00:00',
               '2015-01-01 18:00:00', '2015-01-01 19:00:00',
               '2015-01-01 20:00:00', '2015-01-01 21:00:00',
               '2015-01-01 22:00:00', '2015-01-01 23:00:00',
               '2015-01-02 00:00:00'],
              dtype='datetime64[ns]', freq='H'), Index([u'speed', u'distance', u'cumulative_distance'], dtype='object'), Index([u'speed', u'distance', u'cumulative_distance'], dtype='object'))


In [89]:
pnl.mean()
pnl.max()

Unnamed: 0,2015-01-01 00:00:00,2015-01-01 01:00:00,2015-01-01 02:00:00,2015-01-01 03:00:00,2015-01-01 04:00:00,2015-01-01 05:00:00,2015-01-01 06:00:00,2015-01-01 07:00:00,2015-01-01 08:00:00,2015-01-01 09:00:00,...,2015-01-01 15:00:00,2015-01-01 16:00:00,2015-01-01 17:00:00,2015-01-01 18:00:00,2015-01-01 19:00:00,2015-01-01 20:00:00,2015-01-01 21:00:00,2015-01-01 22:00:00,2015-01-01 23:00:00,2015-01-02 00:00:00
speed,,,,52.682292,73.098958,6.5625,49.746094,60.755208,240.463542,345.502604,...,139.473958,118.5625,124.611979,88.307292,115.854167,182.21875,114.265625,75.125,47.557292,100.057292
distance,,,,13.170573,18.27474,1.640625,12.436523,15.188802,60.115885,86.375651,...,34.86849,29.640625,31.152995,22.076823,28.963542,45.554688,28.566406,18.78125,11.889323,25.014323
cumulative_distance,,,,1903.385417,1216.745768,986.980143,944.089518,1135.549479,1436.174479,2199.378581,...,1769.105143,1663.912435,1957.839844,1702.768229,1363.27181,1497.746094,1586.821289,1353.282552,1094.407552,786.782552


In [96]:
pnl["2015-01-01 07:00:00",:,['distance','speed']]

Unnamed: 0,distance,speed
speed,3.651042,14.604167
distance,0.91276,3.651042
cumulative_distance,15.188802,60.755208


In [97]:
pnl.ix["2015-01-01 07:00:00",:,['distance','speed']]

Unnamed: 0,distance,speed
speed,3.651042,14.604167
distance,0.91276,3.651042
cumulative_distance,15.188802,60.755208


In [86]:
pnl.major_xs

<bound method Panel.major_xs of <class 'pandas.core.panel.Panel'>
Dimensions: 25 (items) x 3 (major_axis) x 3 (minor_axis)
Items axis: 2015-01-01 00:00:00 to 2015-01-02 00:00:00
Major_axis axis: speed to cumulative_distance
Minor_axis axis: speed to cumulative_distance>

In [87]:
#a panel's major slices, will be a dataframe.
#It's index will be the minor-axis, and the columns are the items
pnl.major_xs(key="cumulative_distance")

Unnamed: 0,2015-01-01 00:00:00,2015-01-01 01:00:00,2015-01-01 02:00:00,2015-01-01 03:00:00,2015-01-01 04:00:00,2015-01-01 05:00:00,2015-01-01 06:00:00,2015-01-01 07:00:00,2015-01-01 08:00:00,2015-01-01 09:00:00,...,2015-01-01 15:00:00,2015-01-01 16:00:00,2015-01-01 17:00:00,2015-01-01 18:00:00,2015-01-01 19:00:00,2015-01-01 20:00:00,2015-01-01 21:00:00,2015-01-01 22:00:00,2015-01-01 23:00:00,2015-01-02 00:00:00
speed,,,,-230.072917,-283.220052,-66.726562,49.746094,60.755208,240.463542,345.502604,...,-4.78776,-88.138021,124.611979,-225.364583,-73.226562,182.21875,6.753906,-211.192708,-109.205729,-130.773438
distance,,,,-57.518229,-70.805013,-16.681641,12.436523,15.188802,60.115885,86.375651,...,-1.19694,-22.034505,31.152995,-56.341146,-18.306641,45.554688,1.688477,-52.798177,-27.301432,-32.693359
cumulative_distance,,,,1903.385417,1216.745768,986.980143,944.089518,1135.549479,1436.174479,2199.378581,...,1769.105143,1663.912435,1957.839844,1702.768229,1363.27181,1497.746094,1586.821289,1353.282552,1094.407552,786.782552


In [83]:
pnl.minor_xs

<bound method Panel.minor_xs of <class 'pandas.core.panel.Panel'>
Dimensions: 25 (items) x 3 (major_axis) x 3 (minor_axis)
Items axis: 2015-01-01 00:00:00 to 2015-01-02 00:00:00
Major_axis axis: speed to cumulative_distance
Minor_axis axis: speed to cumulative_distance>

In [84]:
#minor_xs is useful for getting a "Slice" of the Panel, which is a dataframe with the other two axes - 
# Index as Major Axis of Panel, and columns are the items
pnl.minor_xs(key='speed')

Unnamed: 0,2015-01-01 00:00:00,2015-01-01 01:00:00,2015-01-01 02:00:00,2015-01-01 03:00:00,2015-01-01 04:00:00,2015-01-01 05:00:00,2015-01-01 06:00:00,2015-01-01 07:00:00,2015-01-01 08:00:00,2015-01-01 09:00:00,...,2015-01-01 15:00:00,2015-01-01 16:00:00,2015-01-01 17:00:00,2015-01-01 18:00:00,2015-01-01 19:00:00,2015-01-01 20:00:00,2015-01-01 21:00:00,2015-01-01 22:00:00,2015-01-01 23:00:00,2015-01-02 00:00:00
speed,,,,52.682292,73.098958,6.5625,14.890625,14.604167,69.604167,83.604167,...,139.473958,118.5625,49.848958,88.307292,115.854167,83.1875,114.265625,75.125,47.557292,100.057292
distance,,,,13.170573,18.27474,1.640625,3.722656,3.651042,17.401042,20.901042,...,34.86849,29.640625,12.46224,22.076823,28.963542,20.796875,28.566406,18.78125,11.889323,25.014323
cumulative_distance,,,,-230.072917,-283.220052,-66.726562,49.746094,60.755208,240.463542,345.502604,...,-4.78776,-88.138021,124.611979,-225.364583,-73.226562,182.21875,6.753906,-211.192708,-109.205729,-130.773438


### Iterating along the "Items" axis

In [118]:
# We can iterate over the ITEMS axis, using iteritems()
for key, dframe in pnl.iteritems():
    print key, dframe

2015-01-01 00:00:00                      speed  distance  cumulative_distance
speed                  NaN       NaN                  NaN
distance               NaN       NaN                  NaN
cumulative_distance    NaN       NaN                  NaN
2015-01-01 01:00:00                      speed  distance  cumulative_distance
speed                  NaN       NaN                  NaN
distance               NaN       NaN                  NaN
cumulative_distance    NaN       NaN                  NaN
2015-01-01 02:00:00                      speed  distance  cumulative_distance
speed                  NaN       NaN                  NaN
distance               NaN       NaN                  NaN
cumulative_distance    NaN       NaN                  NaN
2015-01-01 03:00:00                           speed   distance  cumulative_distance
speed                 52.682292  13.170573          -230.072917
distance              13.170573   3.292643           -57.518229
cumulative_distance -230.072917 

# Iterating along the Major Axis of a Panel

In [67]:
for key in pnl.major_axis:
    print key

speed
distance
cumulative_distance


## Iterating along the Minor Axis of a Panel

In [105]:
for key in pnl.minor_axis:
    print key
    
# In this case, the keys are the "Items" of the Panel

speed
distance
cumulative_distance


## Iterating along the Minor Axis of a Panel

In [65]:
for key in pnl:
    print key

2015-01-01 00:00:00
2015-01-01 01:00:00
2015-01-01 02:00:00
2015-01-01 03:00:00
2015-01-01 04:00:00
2015-01-01 05:00:00
2015-01-01 06:00:00
2015-01-01 07:00:00
2015-01-01 08:00:00
2015-01-01 09:00:00
2015-01-01 10:00:00
2015-01-01 11:00:00
2015-01-01 12:00:00
2015-01-01 13:00:00
2015-01-01 14:00:00
2015-01-01 15:00:00
2015-01-01 16:00:00
2015-01-01 17:00:00
2015-01-01 18:00:00
2015-01-01 19:00:00
2015-01-01 20:00:00
2015-01-01 21:00:00
2015-01-01 22:00:00
2015-01-01 23:00:00
2015-01-02 00:00:00


### `minor_xs` is a slice of the panel along the minor_axis
Result (the slice) is a data frame

In [104]:
for key in pnl.minor_axis[0:1]:
    print key # each "panel item" is the key in this case
    print "--"
    print pnl.minor_xs(key)

speed
--
                     2015-01-01 00:00:00  2015-01-01 01:00:00  \
speed                                NaN                  NaN   
distance                             NaN                  NaN   
cumulative_distance                  NaN                  NaN   

                     2015-01-01 02:00:00  2015-01-01 03:00:00  \
speed                                NaN            52.682292   
distance                             NaN            13.170573   
cumulative_distance                  NaN          -230.072917   

                     2015-01-01 04:00:00  2015-01-01 05:00:00  \
speed                          73.098958             6.562500   
distance                       18.274740             1.640625   
cumulative_distance          -283.220052           -66.726562   

                     2015-01-01 06:00:00  2015-01-01 07:00:00  \
speed                          14.890625            14.604167   
distance                        3.722656             3.651042   
cumulative_di

## Calculating the "matrix norm" of each slice, using `numpy.linalg`

In [124]:
from numpy import linalg as LA
for item in pnl:
    print  item, LA.norm(pnl[item])

2015-01-01 00:00:00 nan
2015-01-01 01:00:00 nan
2015-01-01 02:00:00 nan
2015-01-01 03:00:00 1933.51830223
2015-01-01 04:00:00 1287.22819378
2015-01-01 05:00:00 991.786188459
2015-01-01 06:00:00 947.002647058
2015-01-01 07:00:00 1139.10367604
2015-01-01 08:00:00 1480.18230668
2015-01-01 09:00:00 2258.05722107
2015-01-01 10:00:00 2167.36515239
2015-01-01 11:00:00 2412.91595724
2015-01-01 12:00:00 2017.7045238
2015-01-01 13:00:00 2084.75898341
2015-01-01 14:00:00 1820.14354708
2015-01-01 15:00:00 1775.31470868
2015-01-01 16:00:00 1673.61326518
2015-01-01 17:00:00 1966.9619729
2015-01-01 18:00:00 1736.70668051
2015-01-01 19:00:00 1372.97375962
2015-01-01 20:00:00 1523.68414647
2015-01-01 21:00:00 1591.48938121
2015-01-01 22:00:00 1390.15285605
2015-01-01 23:00:00 1107.07888236
2015-01-02 00:00:00 816.498549452


### Everything in one line!
Thanks to the power of chaining in Pandas, we can start from an hourly time series dataframe with multiple variables, take a rolling window of a certain size (4), compute the covariance matrix at each timestamp and calculate the norm of each of those matrices.

In [108]:
df1.rolling(window=4, min_periods=4).cov().apply(lambda x: LA.norm(x)).transpose()

Unnamed: 0,speed,distance,cumulative_distance
2015-01-01 00:00:00,,,
2015-01-01 01:00:00,,,
2015-01-01 02:00:00,,,
2015-01-01 03:00:00,236.394659,59.098665,1918.102692
2015-01-01 04:00:00,293.071701,73.267925,1251.278231
2015-01-01 05:00:00,67.068563,16.767141,989.373799
2015-01-01 06:00:00,52.060184,13.015046,945.48102
2015-01-01 07:00:00,62.592389,15.648097,1137.275039
2015-01-01 08:00:00,250.93874,62.734685,1457.406522
2015-01-01 09:00:00,356.087854,89.021963,2228.025795


In [129]:
for i in df1.rolling(window=4, min_periods=4).cov():
    print i


2015-01-01 00:00:00
2015-01-01 01:00:00
2015-01-01 02:00:00
2015-01-01 03:00:00
2015-01-01 04:00:00
2015-01-01 05:00:00
2015-01-01 06:00:00
2015-01-01 07:00:00
2015-01-01 08:00:00
2015-01-01 09:00:00
2015-01-01 10:00:00
2015-01-01 11:00:00
2015-01-01 12:00:00
2015-01-01 13:00:00
2015-01-01 14:00:00
2015-01-01 15:00:00
2015-01-01 16:00:00
2015-01-01 17:00:00
2015-01-01 18:00:00
2015-01-01 19:00:00
2015-01-01 20:00:00
2015-01-01 21:00:00
2015-01-01 22:00:00
2015-01-01 23:00:00
2015-01-02 00:00:00
