# Based on: [ "Computational Tools" ](https://pandas.pydata.org/docs/user_guide/computation.html)

In [1]:
import pandas as pd
import numpy as np

# 1. Statistical Functions

## 1.1 Percentage Change

In [2]:
s = pd.Series(
    [1, 3, 7, 13, 19], index=pd.date_range("2021", periods=5, freq="D")
)
pc1 = s.pct_change(periods=1)  # default
pc2 = s.pct_change(freq="D")  # for time series

pd.concat([s, pc1, pc2], axis=1, keys=["actual", "periods=1", "freq='D'"])

Unnamed: 0,actual,periods=1,freq='D'
2021-01-01,1,,
2021-01-02,3,2.0,2.0
2021-01-03,7,1.333333,1.333333
2021-01-04,13,0.857143,0.857143
2021-01-05,19,0.461538,0.461538


In [3]:
df = pd.DataFrame(np.random.randn(7, 3))
pct = df.pct_change(periods=2)

pd.concat([df, pct], axis=1)

Unnamed: 0,0,1,2,0.1,1.1,2.1
0,0.545726,-1.13551,0.019594,,,
1,-0.959531,-0.650306,-0.13726,,,
2,-0.232615,-0.743346,0.515512,-1.426249,-0.345363,25.309281
3,0.107675,-0.046548,-1.420527,-1.112217,-0.928421,9.349151
4,0.62918,-0.761564,-1.798063,-3.70481,0.024507,-4.487921
5,-0.039506,2.178397,0.115097,-1.366895,-47.798444,-1.081024
6,-1.397907,1.06903,-0.301378,-3.221793,-2.40373,-0.832388


## 1.2 Covariance

In [4]:
s1 = pd.Series(np.random.randn(200))
s2 = pd.Series(np.random.randn(200))

s1.cov(s2), s2.cov(s1)

(0.16857936331387594, 0.16857936331387594)

In [5]:
df = pd.DataFrame(np.random.rand(50, 4), columns=list("ABCD"))
df.cov()

Unnamed: 0,A,B,C,D
A,0.087793,0.002563,0.005675,-0.009516
B,0.002563,0.085902,-0.024499,0.00226
C,0.005675,-0.024499,0.078473,0.012194
D,-0.009516,0.00226,0.012194,0.100552


## 1.3 Correlation

In [6]:
s1.corr(s2)  # default method="pearson"

0.14500740885178884

In [7]:
s1.corr(s2, method="kendall")

0.1035175879396985

In [8]:
s1.corr(s2, method="spearman")

0.15824945623640593

In [9]:
# Using a "callable" in the method argument
# The "callable" should return a single value from 2 ndarrays
def histogram_intersection(a, b):
    return np.minimum(
        np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
    ).sum()
s1.corr(s2, method=histogram_intersection)

-23.876626529686312

In [10]:
df.corr()

Unnamed: 0,A,B,C,D
A,1.0,0.029512,0.068378,-0.101281
B,0.029512,1.0,-0.298391,0.024314
C,0.068378,-0.298391,1.0,0.137272
D,-0.101281,0.024314,0.137272,1.0


>[DataFrame.corrwith()][1] calculates the correlation between like-labeled `Series` in different `DataFrame`s

[1]: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.corrwith.html

In [11]:
df1 = pd.DataFrame(np.random.randn(7, 4), columns=list("ABCD"))
df2 = pd.DataFrame(np.random.randn(7, 4), columns=list("ACDF"))
df1.corrwith(df2) 

A   -0.010618
C    0.418376
D    0.378686
B         NaN
F         NaN
dtype: float64

## 1.4 Rank

In [12]:
s = pd.Series([2.2, 4.5, 3.8, 5.9, 3.8])

# To get the mean of ranks for equal records
s.rank(method="average")  # default

0    1.0
1    4.0
2    2.5
3    5.0
4    2.5
dtype: float64

In [13]:
# To rank in descending order 
s.rank(ascending=False)

0    5.0
1    2.0
2    3.5
3    1.0
4    3.5
dtype: float64

In [14]:
# To get the largest rank for equal records
s.rank(method="max")

0    1.0
1    4.0
2    3.0
3    5.0
4    3.0
dtype: float64

In [15]:
# To get the smallest rank for equal records
s.rank(method="min")

0    1.0
1    4.0
2    2.0
3    5.0
4    2.0
dtype: float64

In [16]:
# To rank equal values according to order of appearance
s.rank(method="first")

0    1.0
1    4.0
2    2.0
3    5.0
4    3.0
dtype: float64

In [17]:
df = pd.DataFrame(np.random.randn(5,3), columns=list("ABC"))
print(df)

# Rank the rows (vertically)
df.rank()  # default axis=0

          A         B         C
0  0.310233 -0.365173  0.079976
1 -0.242121  2.249440  2.815388
2 -0.871873  0.646666  0.252245
3 -0.179339 -0.566788 -0.042412
4 -0.602664 -0.185826 -0.166580


Unnamed: 0,A,B,C
0,5.0,2.0,3.0
1,3.0,5.0,5.0
2,1.0,4.0,4.0
3,4.0,1.0,2.0
4,2.0,3.0,1.0


In [18]:
print(df)

# Rank the column (horizontally)
df.rank(axis=1)

          A         B         C
0  0.310233 -0.365173  0.079976
1 -0.242121  2.249440  2.815388
2 -0.871873  0.646666  0.252245
3 -0.179339 -0.566788 -0.042412
4 -0.602664 -0.185826 -0.166580


Unnamed: 0,A,B,C
0,3.0,1.0,2.0
1,1.0,2.0,3.0
2,1.0,3.0,2.0
3,2.0,1.0,3.0
4,1.0,2.0,3.0
