# Memory Based Collaborative Filtering

In [1]:
import numpy as np

## User-Rating Matrix

The ratings matrix is denoted by `R` and it is an `m` by `n` matrix containing `m` users and `n` items. The rating of user `u1` for item `i3` is denoted by `r13`.

$$ U = \{ u_1, u_2 ... u_m \} \land |U| = m $$

$$ I = \{ i_1, i_2 ... i_n \} \land |I| = n $$

$$ R = \{ r_{1,1}, r_{1,2} ... r_{2,1}, r_{2,2} ... r_{m,n} \} \land |R| = m \times n $$

In [62]:
USERS: int = 5
ITEMS: int = 6

MIN_RATING: int = 0
MAX_RATING: int = 5

In [63]:
RATINGS: np.matrix = MAX_RATING * np.random.randn(USERS, ITEMS)
RATINGS[RATINGS < MIN_RATING] = np.nan  # Missing ratings
RATINGS[RATINGS > MAX_RATING] = MAX_RATING  # Max ratings
RATINGS = np.floor(RATINGS)  # Simple ratings
print(RATINGS)

[[nan  5.  3. nan nan  0.]
 [ 5. nan  5. nan nan nan]
 [nan nan nan nan  1.  1.]
 [nan nan nan  3.  1. nan]
 [nan nan nan  2.  0.  0.]]


## Normalization

- https://en.wikipedia.org/wiki/Normalization_(statistics)

In statistics and applications of statistics, normalization can have a range of meanings. In the simplest cases, normalization of ratings means adjusting values measured on different scales to a notionally common scale, often prior to averaging.

$$ \bar{\mu} = \frac{\sum\limits^{^n}_{i=1} x_i}{n}	 $$

$$ σ^2 = \frac{\sum\limits^{^n}_{i=1} (x_i - \bar{\mu})^2}{n}	 $$

$$ z = \frac{x_i - \bar{\mu}}{σ}	 $$

In [124]:
def normalize(matrix: np.matrix) -> np.matrix:
    """
    Normalization Function.
    Returns z-score of each element in the input matrix.
    """    
    sample_size: np.array = np.sum(~np.isnan(matrix), axis=1)
    sample_size[sample_size == 0] = 1    
    averages: np.array = np.nansum(matrix, axis=1) / sample_size
    averages = averages.reshape(averages.shape[0], 1)
    variance: np.matrix = np.nansum(np.power(matrix - averages, 2), axis=1) / sample_size
    variance = variance.reshape(variance.shape[0], 1)
    variance[variance == 0] = 1
    standard: np.matrix = np.power(variance, 0.5)
    normalized: np.matrix = (matrix - averages) / standard
    return normalized

x: np.matrix = np.matrix([ [1, 2, 3], [4, 5, 7] ])
y = normalize(x)
assert x.shape == y.shape
assert y[0].flat[0] == -1.224744871391589
assert y[0].flat[1] == 0
assert y[0].flat[2] == 1.224744871391589
assert y[1].flat[0] == -1.0690449676496974
assert y[1].flat[1] == -0.2672612419124242
assert y[1].flat[2] == 1.3363062095621223

## User-Based Collaborative Filtering

In [81]:
def collaborative_filtering(ratings: np.matrix) -> np.matrix:
    """
    Collaborative Filtering method.
    """
    
    # Start of Collaborative Filtering.
    print('INPUT', ratings)
    
    # Row sample size
    sample_size: np.array = np.sum(~np.isnan(ratings), axis=1)
    sample_size[sample_size == 0] = 1
    print('SIZE', sample_size)
    
    # Row Average
    averages: np.array = np.nansum(ratings, axis=1) / sample_size
    averages = averages.reshape(averages.shape[0], 1)
    print('AVERAGE', averages)

    # Row Variance
    variance: np.matrix = np.nansum(np.power(ratings - averages, 2), axis=1) / sample_size
    variance = variance.reshape(variance.shape[0], 1)
    variance[variance == 0] = 1
    print('VARIANCE', variance)
    
    # Mean Average Ratings
    adjusted_ratings: np.matrix = ratings - averages / variance
    print('ADJUSTED', adjusted_ratings)
    
    # End of Collaborative Filtering
    # print('OUTPUT', ratings)
    return ratings

collaborative_filtering(RATINGS)
print(RATINGS)

INPUT [[nan  5.  3. nan nan  0.]
 [ 5. nan  5. nan nan nan]
 [nan nan nan nan  1.  1.]
 [nan nan nan  3.  1. nan]
 [nan nan nan  2.  0.  0.]]
SIZE [3 2 2 2 3]
AVERAGE [[2.66666667]
 [5.        ]
 [1.        ]
 [2.        ]
 [0.66666667]]
VARIANCE [[4.22222222]
 [1.        ]
 [1.        ]
 [1.        ]
 [0.88888889]]
ADJUSTED [[        nan  4.36842105  2.36842105         nan         nan -0.63157895]
 [ 0.                 nan  0.                 nan         nan         nan]
 [        nan         nan         nan         nan  0.          0.        ]
 [        nan         nan         nan  1.         -1.                 nan]
 [        nan         nan         nan  1.25       -0.75       -0.75      ]]
[[nan  5.  3. nan nan  0.]
 [ 5. nan  5. nan nan nan]
 [nan nan nan nan  1.  1.]
 [nan nan nan  3.  1. nan]
 [nan nan nan  2.  0.  0.]]


## Memory-Based Collaborative Filtering