In [125]:
import numpy as np
from scipy.spatial.distance import euclidean
from tqdm import tqdm
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt

In [126]:

def read_input():
    with open("kinopoisk_input.txt", "r") as input:
        n, m, q = map(int, input.readline().split())
        matrix = []
        for _ in range(n):
            matrix.append(list(map(int, input.readline().split())))
        queries = []
        for _ in range(q):
            queries.append(input.readline().split())
    return n, m, q, np.array(matrix), queries

In [127]:
n, m, q, matrix, queries = read_input()
print(n, m, q, len(matrix), len(matrix[0]), len(queries))

1500 2100 1000 1500 2100 1000


In [128]:
def build_user_similarity_matrix(matrix, n):
    # create user similarity matrix 
    user_similarity_matrix = np.zeros((n, n))
    for i in tqdm(range(n), desc="Building user similarity matrix"):
        for j in range(i + 1, n):
            user_similarity_matrix[i, j] = np.linalg.norm(matrix[i] - matrix[j])
            user_similarity_matrix[j, i] = user_similarity_matrix[i][j]
    return user_similarity_matrix

def build_item_similarity_matrix(matrix, m):
    # create item similarity matrix
    matrix_t = matrix.T
    item_similarity_matrix = np.zeros((m, m))
    for i in tqdm(range(m), desc="Building item similarity matrix"):
        for j in range(i + 1, m):
            item_similarity_matrix[i, j] = np.linalg.norm(matrix_t[i] - matrix_t[j])
            item_similarity_matrix[j, i] = item_similarity_matrix[i][j]
    return item_similarity_matrix

user_similarity_matrix = build_user_similarity_matrix(matrix, n)
items_similarity_matrix = build_item_similarity_matrix(matrix, m)

Building user similarity matrix: 100%|██████████| 1500/1500 [00:04<00:00, 374.54it/s] 
Building item similarity matrix: 100%|██████████| 2100/2100 [00:11<00:00, 179.60it/s]


In [132]:
def find_closest_items(item_similarity_matrix, item_index, k=1):
    closest_items = np.argsort(item_similarity_matrix[item_index])[1]
    return closest_items
def find_closest_user(user_similarity_matrix, user_index, k=1):
    closest_users = np.argsort(user_similarity_matrix[user_index])[1]
    return closest_users

In [143]:
print("Closest items to item 0:", find_closest_items(items_similarity_matrix, 5))
print(items_similarity_matrix[5][:10])
matrix_t = matrix.T
log = []
for l in range(0, m):
    log.append(np.linalg.norm(matrix_t[5] - matrix_t[l]))
log.sort()
print(log)

Closest items to item 0: 3
[50.74445783 50.90186637 50.36864104  8.66025404  9.48683298  0.
 47.40253158 47.5604878  47.41307836 47.81213235]
[0.0, 8.660254037844387, 9.486832980505138, 20.024984394500787, 20.09975124224178, 20.518284528683193, 21.307275752662516, 21.354156504062622, 21.42428528562855, 22.181073012818835, 22.22611077089287, 22.38302928559939, 22.516660498395403, 22.693611435820433, 23.323807579381203, 23.515952032609693, 23.558437978779494, 23.643180835073778, 23.958297101421877, 23.979157616563597, 24.041630560342615, 24.041630560342615, 24.08318915758459, 24.08318915758459, 24.124676163629637, 24.124676163629637, 24.20743687382041, 24.20743687382041, 24.228082879171435, 24.310491562286437, 24.331050121192877, 24.331050121192877, 24.392621835300936, 24.454038521274967, 24.474476501040833, 24.49489742783178, 24.55605831561735, 24.677925358506133, 24.71841418861655, 24.779023386727733, 24.899799195977465, 24.939927826679853, 25.019992006393608, 25.019992006393608, 25.01