## Pandas Window functions

> https://www.youtube.com/playlist?list=PLXovS_5EZGh6CpyyB4m7dQDlcocsqIseK

---

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Rank

In [6]:
a = pd.Series([1,4,5,5,7,2,9,13])

In [13]:
df = pd.DataFrame(a, columns=['value'])

In [14]:
df

Unnamed: 0,value
0,1
1,4
2,5
3,5
4,7
5,2
6,9
7,13


In [20]:
df['rank'] = df['value'].rank(ascending=False, method="min") # min means that if 2 nos have same rank, - then the min rank will be assigned to both

In [21]:
df

Unnamed: 0,value,rank
0,1,8.0
1,4,6.0
2,5,4.0
3,5,4.0
4,7,3.0
5,2,7.0
6,9,2.0
7,13,1.0


In [22]:
df['rank'] = df['value'].rank(ascending=False, method="dense") # dense means no nuimbers will be skipped

In [23]:
df

Unnamed: 0,value,rank
0,1,7.0
1,4,5.0
2,5,4.0
3,5,4.0
4,7,3.0
5,2,6.0
6,9,2.0
7,13,1.0


In [26]:
df['rank'] = df['value'].rank(ascending=True, method="first") # dense means no nuimbers will be skipped
df

Unnamed: 0,value,rank
0,1,1.0
1,4,3.0
2,5,4.0
3,5,5.0
4,7,6.0
5,2,2.0
6,9,7.0
7,13,8.0


### Window functions

__Cumulative sum__:

In [33]:
df = pd.DataFrame({
    "a": list(range(1, 11)),
    "b": [10, np.nan, np.nan, 20, np.nan, np.nan, 30, 30, 30, np.nan]
})

In [34]:
df

Unnamed: 0,a,b
0,1,10.0
1,2,
2,3,
3,4,20.0
4,5,
5,6,
6,7,30.0
7,8,30.0
8,9,30.0
9,10,


In [35]:
### cumsum

df['a_cumsum'] = df['a'].cumsum(skipna=True)
df

Unnamed: 0,a,b,a_cumsum
0,1,10.0,1
1,2,,3
2,3,,6
3,4,20.0,10
4,5,,15
5,6,,21
6,7,30.0,28
7,8,30.0,36
8,9,30.0,45
9,10,,55


In [36]:
df['a_cummin'] = df['a'].cummin(skipna=True)
df

Unnamed: 0,a,b,a_cumsum,a_cummin
0,1,10.0,1,1
1,2,,3,1
2,3,,6,1
3,4,20.0,10,1
4,5,,15,1
5,6,,21,1
6,7,30.0,28,1
7,8,30.0,36,1
8,9,30.0,45,1
9,10,,55,1


__Rolling window__

In [37]:
df['a_rol_w2_sum'] = df['a'].rolling(window=2).sum()

In [38]:
df

Unnamed: 0,a,b,a_cumsum,a_cummin,a_rol_w2_sum
0,1,10.0,1,1,
1,2,,3,1,3.0
2,3,,6,1,5.0
3,4,20.0,10,1,7.0
4,5,,15,1,9.0
5,6,,21,1,11.0
6,7,30.0,28,1,13.0
7,8,30.0,36,1,15.0
8,9,30.0,45,1,17.0
9,10,,55,1,19.0


In [40]:
df['b_count'] = df['b'].rolling(window=2, min_periods=2).count()  # count means count no of non na values in that window
df

Unnamed: 0,a,b,a_cumsum,a_cummin,a_rol_w2_sum,b_count
0,1,10.0,1,1,,
1,2,,3,1,3.0,1.0
2,3,,6,1,5.0,0.0
3,4,20.0,10,1,7.0,1.0
4,5,,15,1,9.0,1.0
5,6,,21,1,11.0,0.0
6,7,30.0,28,1,13.0,1.0
7,8,30.0,36,1,15.0,2.0
8,9,30.0,45,1,17.0,2.0
9,10,,55,1,19.0,1.0


Similarly these operations are all supported

- skew
- kurtosis
- quantile
- covariance
- corr

__win_type in rolling windows__

![](https://raw.githubusercontent.com/ShaunakSen/Problem-Solving-and-Algorithms/master/Data%20challenges%20-%20pandas%2C%20sql/data/Screenshot%202024-06-02%20at%203.55.41%E2%80%AFPM.png)