# Based on: [ "Computational Tools" ](https://pandas.pydata.org/docs/user_guide/computation.html)

In [1]:
import pandas as pd
import numpy as np

# 1. Statistical Functions

## 1.1 Percentage Change

In [2]:
s = pd.Series(
    [1, 3, 7, 13, 19], index=pd.date_range("2021", periods=5, freq="D")
)
pc1 = s.pct_change(periods=1)  # default
pc2 = s.pct_change(freq="D")  # for time series

pd.concat([s, pc1, pc2], axis=1, keys=["actual", "periods=1", "freq='D'"])

Unnamed: 0,actual,periods=1,freq='D'
2021-01-01,1,,
2021-01-02,3,2.0,2.0
2021-01-03,7,1.333333,1.333333
2021-01-04,13,0.857143,0.857143
2021-01-05,19,0.461538,0.461538


In [3]:
df = pd.DataFrame(np.random.randn(7, 3))
pct = df.pct_change(periods=2)

pd.concat([df, pct], axis=1)

Unnamed: 0,0,1,2,0.1,1.1,2.1
0,-0.894541,-0.012984,0.282799,,,
1,0.461746,-0.694238,-0.686554,,,
2,0.467688,0.329557,1.61411,-1.522825,-26.382285,4.707612
3,0.237499,-0.691695,-0.080837,-0.485651,-0.003663,-0.882256
4,0.189039,0.867562,2.02726,-0.5958,1.632512,0.255962
5,-1.131474,-1.907239,-0.214409,-5.764129,1.757343,1.652343
6,-1.874843,-1.38516,0.673579,-10.917737,-2.596612,-0.667739


## 1.2 Covariance

In [4]:
s1 = pd.Series(np.random.randn(200))
s2 = pd.Series(np.random.randn(200))

s1.cov(s2), s2.cov(s1)

(-0.05007946950833725, -0.05007946950833725)

In [5]:
df = pd.DataFrame(np.random.rand(50, 4), columns=list("ABCD"))
df.cov()

Unnamed: 0,A,B,C,D
A,0.091951,-0.007143,-0.016037,0.01119
B,-0.007143,0.09676,-0.02328,-0.003914
C,-0.016037,-0.02328,0.080821,0.01596
D,0.01119,-0.003914,0.01596,0.082832


## 1.3 Correlation

In [6]:
s1.corr(s2)  # default method="pearson"

-0.049590881186760916

In [7]:
s1.corr(s2, method="kendall")

-0.029748743718592964

In [8]:
s1.corr(s2, method="spearman")

-0.05077776944423611

In [9]:
# Using a "callable" in the method argument
# The "callable" should return a single value from 2 ndarrays
def histogram_intersection(a, b):
    return np.minimum(
        np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
    ).sum()
s1.corr(s2, method=histogram_intersection)

-10.505102910862558

In [10]:
df.corr()

Unnamed: 0,A,B,C,D
A,1.0,-0.075728,-0.186025,0.128221
B,-0.075728,1.0,-0.263255,-0.043718
C,-0.186025,-0.263255,1.0,0.195062
D,0.128221,-0.043718,0.195062,1.0


>[DataFrame.corrwith()][1] calculates the correlation between like-labeled `Series` in different `DataFrame`s

[1]: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.corrwith.html

In [11]:
df1 = pd.DataFrame(np.random.randn(7, 4), columns=list("ABCD"))
df2 = pd.DataFrame(np.random.randn(7, 4), columns=list("ACDF"))
df1.corrwith(df2) 

A    0.005433
C    0.870525
D    0.858922
B         NaN
F         NaN
dtype: float64

## 1.4 Rank

In [12]:
s = pd.Series([2.2, 4.5, 3.8, 5.9, 3.8])

# To get the mean of ranks for equal records
s.rank(method="average")  # default

0    1.0
1    4.0
2    2.5
3    5.0
4    2.5
dtype: float64

In [13]:
# To rank in descending order 
s.rank(ascending=False)

0    5.0
1    2.0
2    3.5
3    1.0
4    3.5
dtype: float64

In [14]:
# To get the largest rank for equal records
s.rank(method="max")

0    1.0
1    4.0
2    3.0
3    5.0
4    3.0
dtype: float64

In [15]:
# To get the smallest rank for equal records
s.rank(method="min")

0    1.0
1    4.0
2    2.0
3    5.0
4    2.0
dtype: float64

In [16]:
# To rank equal values according to order of appearance
s.rank(method="first")

0    1.0
1    4.0
2    2.0
3    5.0
4    3.0
dtype: float64

In [17]:
df = pd.DataFrame(np.random.randn(5,3), columns=list("ABC"))
print(df)

# Rank the rows (vertically)
df.rank()  # default axis=0

          A         B         C
0  0.729632  0.622246  1.344559
1  0.239326 -0.468024  1.786679
2  1.926087 -0.390373  0.747233
3  1.149603  0.006734 -0.918536
4  0.479665  0.995739 -0.805374


Unnamed: 0,A,B,C
0,3.0,4.0,4.0
1,1.0,1.0,5.0
2,5.0,2.0,3.0
3,4.0,3.0,1.0
4,2.0,5.0,2.0


In [18]:
print(df)

# Rank the column (horizontally)
df.rank(axis=1)

          A         B         C
0  0.729632  0.622246  1.344559
1  0.239326 -0.468024  1.786679
2  1.926087 -0.390373  0.747233
3  1.149603  0.006734 -0.918536
4  0.479665  0.995739 -0.805374


Unnamed: 0,A,B,C
0,2.0,1.0,3.0
1,2.0,1.0,3.0
2,3.0,1.0,2.0
3,3.0,2.0,1.0
4,2.0,3.0,1.0
