In [60]:
import numpy as np

# term-frequency matrix
tf_mat = np.array([
    [1, 1, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 1, 1, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 1, 1, 1],
    [1, 0, 0, 1, 0, 0, 0, 0, 0],
    [1, 0, 1, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 1, 1],
    [0, 1, 0, 0, 1, 0, 0, 0, 0],
    [0, 1, 0, 0, 0, 0, 0, 0, 1],
    [0, 1, 1, 2, 0, 0, 0, 0, 0],
    [0, 1, 0, 0, 1, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 1, 1, 1, 0],
    [0, 1, 1, 0, 1, 0, 0, 0, 0]
    ])

In [63]:
def tfidf(f):
    """Applies term-frequency inverse document frequency weighting."""
    n = np.shape(f)[1]                              # number of documents
    chi = np.where(f==0, 0, 1)                      # binary indicator for term-frequency
    l = np.log(n / np.sum(chi, axis=1))             # logarithmic term
    return f * l[:, np.newaxis]                     # multiply term frequency with reshaped log term
    
print('Using TF-IDF:\n\n', tfidf(tf_mat))

Using TF-IDF:

 [[1.5040774  1.5040774  0.         0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         1.5040774  1.5040774  0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  1.09861229 1.09861229 1.09861229]
 [1.5040774  0.         0.         1.5040774  0.         0.
  0.         0.         0.        ]
 [1.5040774  0.         1.5040774  0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         1.5040774  1.5040774 ]
 [0.         1.5040774  0.         0.         1.5040774  0.
  0.         0.         0.        ]
 [0.         1.5040774  0.         0.         0.         0.
  0.         0.         1.5040774 ]
 [0.         1.09861229 1.09861229 2.19722458 0.         0.
  0.         0.         0.        ]
 [0.         1.5040774  0.         0.         1.5040774  0.
  0.         0.         0.        ]
 [0.         0.         

In [64]:
def le(f):
    """Applies log-entropy weighting."""
    n = np.shape(f)[1]                              # number of documents
    p = f / np.sum(f, axis=1)[:, np.newaxis]        # proportion of term frequency in document vs total
    s = np.nan_to_num(p * np.log(p) / np.log(n))    # terms within sum
    s = np.sum(s, axis=1)[:, np.newaxis]            # take sum and reshape
    return np.log(1 + f) * (1 + s)

print('Using LE:\n\n', le(tf_mat))

Using LE:

 [[0.47448359 0.47448359 0.         0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.47448359 0.47448359 0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.34657359 0.34657359 0.34657359]
 [0.47448359 0.         0.         0.47448359 0.         0.
  0.         0.         0.        ]
 [0.47448359 0.         0.47448359 0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.47448359 0.47448359]
 [0.         0.47448359 0.         0.         0.47448359 0.
  0.         0.         0.        ]
 [0.         0.47448359 0.         0.         0.         0.
  0.         0.         0.47448359]
 [0.         0.3651518  0.3651518  0.5787519  0.         0.
  0.         0.         0.        ]
 [0.         0.47448359 0.         0.         0.47448359 0.
  0.         0.         0.        ]
 [0.         0.         0.  

  """
  """
