### Statistical Functions Using Pandas  
**Author:** Taskeen Hussain  

### Pandas provides a rich set of statistical functions to analyze data, including measures like mean, median, variance, and correlation. These functions help in summarizing and gaining insights from structured datasets.

In [3]:
# ### Percent_change

import pandas as pd
import numpy as np
s = pd.Series([1,2,3,4,5,4])
print (s.pct_change())

df = pd.DataFrame(np.random.randn(5, 2))
print (df.pct_change())

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.250000
5   -0.200000
dtype: float64
          0         1
0       NaN       NaN
1  0.656713 -1.698098
2 -0.275993  2.011901
3 -0.540496 -0.959352
4  6.068732 -2.556078


In [4]:
# ### Covariance

import pandas as pd
import numpy as np
s1 = pd.Series(np.random.randn(10))
s2 = pd.Series(np.random.randn(10))
print (s1.cov(s2))



0.20955327477879804


In [5]:
frame = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
print (frame['a'].cov(frame['b']))
print (frame.cov())

-0.275340468704078
          a         b         c         d         e
a  1.703264 -0.275340  0.137832 -0.230641 -0.462069
b -0.275340  1.018765  0.271539  0.156815  0.646879
c  0.137832  0.271539  0.267276 -0.045163  0.036630
d -0.230641  0.156815 -0.045163  0.800249  0.083346
e -0.462069  0.646879  0.036630  0.083346  0.725451


In [6]:
# ### Correlation

import pandas as pd
import numpy as np
frame = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])

print (frame['a'].corr(frame['b']))
print (frame.corr())

-0.3179699207059164
          a         b         c         d         e
a  1.000000 -0.317970 -0.324751 -0.220427  0.188621
b -0.317970  1.000000 -0.215369  0.284493  0.225299
c -0.324751 -0.215369  1.000000  0.093924  0.213268
d -0.220427  0.284493  0.093924  1.000000 -0.232221
e  0.188621  0.225299  0.213268 -0.232221  1.000000


In [11]:
# ### Data Ranking

import pandas as pd
import numpy as nm

s = pd.Series(np.random.randn(5), index=list('abcde'))
s['d'] = s['b'] # so there's a tie
print (s.rank())



a    3.0
b    4.5
c    1.0
d    4.5
e    2.0
dtype: float64


In [8]:
# ### .rolling() Function
 
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
   index = pd.date_range('1/1/2000', periods=10),
   columns = ['A', 'B', 'C', 'D'])
print (df.rolling(window=3).mean())


                   A         B         C         D
2000-01-01       NaN       NaN       NaN       NaN
2000-01-02       NaN       NaN       NaN       NaN
2000-01-03 -0.181516  0.191086 -0.620781 -1.527810
2000-01-04 -0.204431  0.723023 -0.521309 -0.880120
2000-01-05  0.130565  0.574943  0.271762 -0.064261
2000-01-06  0.251491  0.835240  0.520558  0.763676
2000-01-07  0.350367  0.659635  0.362826  0.416000
2000-01-08 -0.303263  1.077971 -0.679789  0.100918
2000-01-09 -0.460755  0.862192 -0.901945 -0.774952
2000-01-10  0.461023  0.451612 -0.892808 -0.704417


In [9]:
 ### .expanding() Function

import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4),
   index = pd.date_range('1/1/2000', periods=10),
   columns = ['A', 'B', 'C', 'D'])
print (df.expanding(min_periods=3).mean())


                   A         B         C         D
2000-01-01       NaN       NaN       NaN       NaN
2000-01-02       NaN       NaN       NaN       NaN
2000-01-03  1.261693  0.708666 -0.662397  0.835515
2000-01-04  0.880853  0.659134 -0.198604  0.033167
2000-01-05  0.703609  0.328878 -0.117300 -0.074543
2000-01-06  0.505964  0.101512 -0.366445 -0.089305
2000-01-07  0.597057  0.168349 -0.260992  0.039169
2000-01-08  0.485837  0.228161 -0.348649  0.009766
2000-01-09  0.597572  0.152906 -0.093674  0.035078
2000-01-10  0.546673  0.125989 -0.104891 -0.088528


In [10]:

import pandas as pd
import numpy as np
 
df = pd.DataFrame(np.random.randn(10, 4),
   index = pd.date_range('1/1/2000', periods=10),
   columns = ['A', 'B', 'C', 'D'])
print (df.ewm(com=0.5).mean())


                   A         B         C         D
2000-01-01  1.714294  0.516502 -0.234296  0.716299
2000-01-02 -0.131119  0.194814  1.195002  0.541459
2000-01-03 -0.240991 -0.777030  0.710181  0.702635
2000-01-04  0.670963 -1.245133  1.271540  0.043518
2000-01-05 -0.307357  1.247821 -0.090037  0.848650
2000-01-06  1.245175 -0.576841 -0.601820  1.589974
2000-01-07 -0.704255 -0.802813  0.473563  0.849985
2000-01-08 -1.218326 -1.135134 -0.160162  1.005365
2000-01-09 -0.638636 -0.544881  0.322201 -0.752796
2000-01-10 -0.012760 -0.613447 -1.305081 -0.441385
