### Transform a BOW Matrix into a PPMI Matrix

In [1]:
import numpy as np
import pandas as pd

In [68]:
def ppmi(matrix):
    # new matrix <-- copy of the BOW matrix
    pmi_matrix = matrix.copy()
    # Loop over i,j 
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            # rowsum(X,i)
            rowsum = matrix.sum(axis=1)[i]
            # colsum(X,j)
            colsum = matrix.sum(axis=0)[j]
            # sum(X)
            zum = matrix.sum().sum()
            # expected(X,i,j)
            expected = (rowsum * colsum) / zum
            # oe(X,i,j)
            oe = matrix.loc[i,j] / expected
            
            # index PPMI value 
            pmi_matrix.loc[i,j] = max(np.log2(oe), 0)
    
    return pmi_matrix

### Test

In [69]:
X = pd.DataFrame(np.array(
    [[0,9,0,0,12,0,8,6,0],
    [0,13,8,0,15,0,5,0,0],
    [0,0,0,9,10,7,0,0,1],
    [6,0,0,1,8,3,0,0,0]]))

In [70]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,9,0,0,12,0,8,6,0
1,0,13,8,0,15,0,5,0,0
2,0,0,0,9,10,7,0,0,1
3,6,0,0,1,8,3,0,0,0


In [72]:
ppmi(X).round(2)



Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.5,0.0,0.0,0.0,0.0,1.09,1.79,0.0
1,0.0,0.8,1.56,0.0,0.0,0.0,0.18,0.0,0.0
2,0.0,0.0,0.0,2.01,0.0,1.65,0.0,0.0,2.16
3,2.75,0.0,0.0,0.0,0.26,1.01,0.0,0.0,0.0
