In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

In [2]:
def mse(predictions, targets):
    diff = predictions - targets
    squared_diff = np.square(diff)
    mean_squared_diff = np.mean(squared_diff)
    return mean_squared_diff

In [3]:
def cosine_similarity(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    norm1 = np.linalg.norm(vector1)
    norm2 = np.linalg.norm(vector2)
    return dot_product / (norm1 * norm2)

In [4]:
def pearson_similarity(vector1, vector2):
    mean1 = np.mean(vector1)
    mean2 = np.mean(vector2)
    centered1 = vector1 - mean1
    centered2 = vector2 - mean2
    numerator = np.dot(centered1, centered2)
    denominator = np.linalg.norm(centered1) * np.linalg.norm(centered2)
    if denominator == 0:
        return 0
    return numerator / denominator

In [5]:
def user_based_knn(matrix, k, similarity_measure):
    num_users = matrix.shape[0]
    predictions = np.zeros_like(matrix)

    for i in range(num_users):
        similarities = []

        for j in range(num_users):
            if i != j:
                similarity = similarity_measure(matrix[i], matrix[j])
                similarities.append((j, similarity))

        similarities.sort(key=lambda x: x[1], reverse=True)
        neighbors = similarities[:k]

        for item in range(matrix.shape[1]):
            rating_sum = 0.0
            weight_sum = 0.0

            for neighbor in neighbors:
                neighbor_index = neighbor[0]
                similarity = neighbor[1]

                rating_sum += similarity * matrix[neighbor_index, item]
                weight_sum += similarity

            if weight_sum != 0:
                predictions[i, item] = rating_sum / weight_sum

    return predictions

In [6]:
np.random.seed(42)
num_users = 4242
num_items = 135
ratings = np.random.randint(1, 6, size=(num_users, num_items))

In [7]:
k_values = range(1, 7)
results = []

for k in k_values:
    
    predictions_cosine = user_based_knn(ratings, k, cosine_similarity)
    rmse_cosine = np.sqrt(mse(ratings, predictions_cosine))
        
    predictions_pearson = user_based_knn(ratings, k, pearson_similarity)
    rmse_pearson = np.sqrt(mse(ratings, predictions_pearson))
    
    results.append([k, rmse_cosine, rmse_pearson])


In [8]:
df_results = pd.DataFrame(results, columns=['K', 'RMSE (Cossine Similarity)', 'RMSE (Pearson Similarity)'])
print(df_results)

   K  RMSE (Cossine Similarity)  RMSE (Pearson Similarity)
0  1                   1.666012                   1.667589
1  2                   1.459345                   1.473817
2  3                   1.376565                   1.394930
3  4                   1.331280                   1.349565
4  5                   1.304557                   1.320162
5  6                   1.287045                   1.300491


In [9]:
df_results.to_csv('resul.csv')