### Data Aggregation

* data must be available or converted to a dataframe to apply the aggregation function

In [3]:
import pandas as pd
import numpy as np

In [5]:
df = pd.DataFrame(np.random.randn(10,4),
                 index=pd.date_range('10/01/2020', periods=10),
                 columns = ['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
2020-10-01,1.559378,-0.573997,0.900591,-0.468328
2020-10-02,2.103198,-1.599105,-0.323621,0.262281
2020-10-03,-1.554758,-0.540605,-0.095287,1.456846
2020-10-04,-0.108316,0.149212,0.726945,-0.283276
2020-10-05,0.089597,-0.468089,0.744618,-0.225871
2020-10-06,0.980943,0.122534,-0.944628,-0.321029
2020-10-07,1.635081,0.614773,0.495163,0.159827
2020-10-08,0.862463,0.323095,2.267512,-0.989148
2020-10-09,0.933601,-1.277404,0.549069,-1.123676
2020-10-10,0.022127,-0.3854,-0.203843,-0.46054


In [9]:
agg = df.rolling(window=3, min_periods=1)
agg

Rolling [window=3,min_periods=1,center=False,axis=0,method=single]

In [10]:
# apply aggregation to whole dataframe 

print(agg.aggregate(np.sum))

                   A         B         C         D
2020-10-01  1.559378 -0.573997  0.900591 -0.468328
2020-10-02  3.662577 -2.173102  0.576970 -0.206047
2020-10-03  2.107819 -2.713706  0.481684  1.250799
2020-10-04  0.440124 -1.990497  0.308037  1.435851
2020-10-05 -1.573478 -0.859482  1.376277  0.947699
2020-10-06  0.962224 -0.196343  0.526935 -0.830175
2020-10-07  2.705621  0.269218  0.295154 -0.387073
2020-10-08  3.478488  1.060402  1.818048 -1.150350
2020-10-09  3.431145 -0.339536  3.311745 -1.952998
2020-10-10  1.818191 -1.339709  2.612738 -2.573365


In [11]:
# apply aggregation to a column

print(agg['B'].aggregate(np.sum))

2020-10-01   -0.573997
2020-10-02   -2.173102
2020-10-03   -2.713706
2020-10-04   -1.990497
2020-10-05   -0.859482
2020-10-06   -0.196343
2020-10-07    0.269218
2020-10-08    1.060402
2020-10-09   -0.339536
2020-10-10   -1.339709
Freq: D, Name: B, dtype: float64


## also aggregation function can apply to multiple columns

* Another Example

In [12]:
marks_df = pd.DataFrame({'names':['shakil','chamiya','walid','tithi','peter','sofia'],
                  'algorithm':[78,89,90,85,80,82],
                        'database':[92,85,86,80,89,81],
                        'os':[84,84,80,79,86,85]})
marks_df

Unnamed: 0,names,algorithm,database,os
0,shakil,78,92,84
1,chamiya,89,85,84
2,walid,90,86,80
3,tithi,85,80,79
4,peter,80,89,86
5,sofia,82,81,85


In [15]:
r = marks_df.rolling(window=3, min_periods=1)
r

Rolling [window=3,min_periods=1,center=False,axis=0,method=single]

In [18]:
# apply to whole dataframe

r.agg(np.sum)

  r.agg(np.sum)


Unnamed: 0,algorithm,database,os
0,78.0,92.0,84.0
1,167.0,177.0,168.0
2,257.0,263.0,248.0
3,264.0,251.0,243.0
4,255.0,255.0,245.0
5,247.0,250.0,250.0


In [20]:
# apply to multiple columns

r[['algorithm','database']].agg(np.sum)

Unnamed: 0,algorithm,database
0,78.0,92.0
1,167.0,177.0
2,257.0,263.0
3,264.0,251.0
4,255.0,255.0
5,247.0,250.0
