In [1]:
import numpy as np
from numpy import random as rand

In [2]:
def create_matrix(n_rows, n_columns):
    scores_array = rand.uniform(1,10,size=(n_rows, n_columns))
    return scores_array
 

In [114]:
# Test function
scores = create_matrix(20,20)

print(scores)

[[5.36265708 2.10703664 3.71312329 4.61481869 9.7196107 ]
 [8.17963001 3.4619645  1.84202186 6.18416895 4.35127878]
 [7.38951605 2.95639157 1.40550588 8.18909246 6.73713526]
 [6.01126905 8.42338164 7.94784608 5.78392251 4.33921289]
 [5.0805607  4.80334916 8.04343701 4.54649378 9.09480025]]


In [115]:
def calculate_scores_for_prediction(scores_array, percent):
    scores_for_prediction = scores_array.copy()
    number_of_zeros = int(np.round((1-percent)*scores_array.shape[0]*scores_array.shape[1]))
    choices = np.random.choice(scores_array.size, number_of_zeros, replace=False)
    scores_for_prediction.ravel()[choices] = 0 
    
    return scores_for_prediction

In [117]:
# Test function
scores_predict = calculate_scores_for_prediction(scores, 0.7)

print(scores_predict)

[[5.36265708 2.10703664 3.71312329 4.61481869 0.        ]
 [0.         3.4619645  1.84202186 6.18416895 0.        ]
 [7.38951605 2.95639157 1.40550588 0.         6.73713526]
 [6.01126905 0.         7.94784608 5.78392251 4.33921289]
 [5.0805607  4.80334916 8.04343701 4.54649378 9.09480025]]


In [118]:
def Jaccard_Similarity(x, y):
    intersection = len(set(x) & set(y))
    union = len(set(x) | set(y))
    return intersection/float(union)

In [119]:
# Test function
Jaccard_Similarity(scores_predict[:,0], scores_predict[:,4])

0.125

In [120]:
def Dice_Similarity(x,y):
    intersection = len(set(x) & set(y))
    return (2*intersection)/(len(x) + len(y))

In [121]:
# Test function
Dice_Similarity(scores_predict[:,0], scores_predict[:,4])

0.2

In [122]:
def Cosine_Similarity(x,y):
    dot_product = np.dot(x,y)
    x_norm = np.linalg.norm(x)
    y_norm = np.linalg.norm(y)
    return dot_product/(x_norm*y_norm)

In [123]:
# Test function
Cosine_Similarity(scores_predict[:,0], scores_predict[:,4])

0.8354461414340628

In [124]:
def Adjusted_Cosine_Similarity(x,y):
    x_mean = np.mean(x)
    x_adjusted = x - x_mean
    
    y_mean = np.mean(y)
    y_adjusted = y - y_mean
    
    return Cosine_Similarity(x_adjusted, y_adjusted)

In [125]:
# Test function
Adjusted_Cosine_Similarity(scores_predict[:,0], scores_predict[:,4])

0.5686346532793967

In [170]:
def predict_scores_in_matrix(scores_predict, scores_actual, method_prediction, method_calculation, k):
    mae = 0
    for idx, x in np.ndenumerate(scores_predict):
        if x == 0:
            predicted_score = predict_score_in_place(idx[0], idx[1], scores_predict, method_prediction, method_calculation, k)
            actual_score = scores_actual[idx[0],idx[1]]
            mae += np.absolute(predicted_score - actual_score)
    return mae            
            

In [171]:
predict_scores_in_matrix(scores_predict, method_prediction = 'Adjusted Cosine', method_calculation = 'Simple Average', k = 2)

TypeError: predict_scores_in_matrix() missing 1 required positional argument: 'scores_actual'

In [172]:
def predict_score_in_place(n_row, n_col, score_matrix, method_prediction, method_calculation, k):
    similarities = []
    for j in range(len(score_matrix[n_row])):
        current_column = j
        if j == n_col:
            continue
        else:
            vector_01 = []
            vector_02 = []
            for i in range(len(score_matrix[:,current_column])):
                if score_matrix[i,current_column] != 0 and score_matrix[i,n_col] != 0:
                    vector_01.append(score_matrix[i,current_column])
                    vector_02.append(score_matrix[i,n_col])
            # Make some changes
            similarity = choose_comparision_method(method_prediction, vector_01, vector_02)
            weight = Adjusted_Cosine_Similarity(vector_01, vector_02)
            similarities.append((similarity, current_column, weight))
    return calculate_score(score_matrix, similarities, n_row, k, method_calculation)

In [173]:
def choose_comparision_method(name_of_method, vector_01, vector_02):
    if name_of_method == 'Jaccard':
        return Jaccard_Similarity(vector_01, vector_02)
    elif name_of_method == 'Dice':
        return Dice_Similarity(vector_01, vector_02)
    elif name_of_method == 'Cosine':
        return Cosine_Similarity(vector_01, vector_02)
    elif name_of_method == 'Adjusted Cosine':
        return Adjusted_Cosine_Similarity(vector_01, vector_02)

In [174]:
def calculate_score(score_matrix, similarity_matrix, row, k, method):
    similarity_matrix.sort(key=lambda x:x[0], reverse=True)
    sum_scores = 0
    if method == 'Simple Average':
        for i in range(k):
            column = similarity_matrix[i][1]
            score = score_matrix[row][column]
            sum_scores += score
        return sum_scores/k
    elif method == 'Weighted Average':
        sum_weights = 0;
        for i in range(k):
            column = similarity_matrix[i][1]
            weight = similarity_matrix[i][2]
            score = score_matrix[row][column]
            weighted_score = weight*score
            sum_scores += weighted_score
            sum_weights += weight
        return sum_scores/sum_weights

In [175]:
def item_to_item_collaborative_filtering(n_rows, n_cols, percent, method_prediction, method_calculation, k, n_iters):
    scores_matrix = create_matrix(n_rows, n_cols)
    for i in range(n_iters):
        scores_to_predict = calculate_scores_for_prediction(scores_matrix, percent)
        mean_absolute_error = predict_scores_in_matrix(scores_to_predict, scores_matrix,method_prediction,method_calculation, k)
        print("For iteration ", i+1, " the mae is: ", mean_absolute_error)

In [180]:
item_to_item_collaborative_filtering(50,50,0.9,'Cosine','Simple Average',20,10)

For iteration  1  the mae is:  569.1543771252826
For iteration  2  the mae is:  553.3976244444865
For iteration  3  the mae is:  590.784631040791
For iteration  4  the mae is:  599.82398794429
For iteration  5  the mae is:  577.9388722902141
For iteration  6  the mae is:  552.0555605718326
For iteration  7  the mae is:  560.6918832404845
For iteration  8  the mae is:  605.7102871059817
For iteration  9  the mae is:  550.5181262540964
For iteration  10  the mae is:  590.5085876055665
