In [2]:
import numpy as np
import pandas as pd


def loadData():
    users = {'Alice': {'A': 5, 'B': 3, 'C': 4, 'D': 4},
             'user1': {'A': 3, 'B': 1, 'C': 2, 'D': 3, 'E': 3},
             'user2': {'A': 4, 'B': 3, 'C': 4, 'D': 3, 'E': 5},
             'user3': {'A': 3, 'B': 3, 'C': 1, 'D': 5, 'E': 4},
             'user4': {'A': 1, 'B': 5, 'C': 5, 'D': 2, 'E': 1}
             }
    return users

In [3]:
user_data = loadData()
similarity_matrix = pd.DataFrame(
    np.identity(len(user_data)),
    index=user_data.keys(),
    columns=user_data.keys(),
)
for u1, item1 in user_data.items():
    for u2, item2 in user_data.items():
        if u1 == u2: continue
        vec1, vec2 = [], []
        for item, rating1 in item1.items():
            rating2 = item2.get(item, -1)
            if rating2 == -1: continue
            vec1.append(rating1)
            vec2.append(rating2)
        similarity_matrix[u1][u2] = np.corrcoef(vec1, vec2)[0][1]
        print(np.corrcoef(vec1, vec2))
        print('---')
print(similarity_matrix)

[[1.         0.85280287]
 [0.85280287 1.        ]]
---
[[1.         0.70710678]
 [0.70710678 1.        ]]
---
[[1. 0.]
 [0. 1.]]
---
[[ 1.         -0.79211803]
 [-0.79211803  1.        ]]
---
[[1.         0.85280287]
 [0.85280287 1.        ]]
---
[[1.         0.46770717]
 [0.46770717 1.        ]]
---
[[1.         0.48995593]
 [0.48995593 1.        ]]
---
[[ 1.        -0.9001488]
 [-0.9001488  1.       ]]
---
[[1.         0.70710678]
 [0.70710678 1.        ]]
---
[[1.         0.46770717]
 [0.46770717 1.        ]]
---
[[ 1.         -0.16116459]
 [-0.16116459  1.        ]]
---
[[ 1.         -0.46656947]
 [-0.46656947  1.        ]]
---
[[1. 0.]
 [0. 1.]]
---
[[1.         0.48995593]
 [0.48995593 1.        ]]
---
[[ 1.         -0.16116459]
 [-0.16116459  1.        ]]
---
[[ 1.        -0.6415029]
 [-0.6415029  1.       ]]
---
[[ 1.         -0.79211803]
 [-0.79211803  1.        ]]
---
[[ 1.        -0.9001488]
 [-0.9001488  1.       ]]
---
[[ 1.         -0.46656947]
 [-0.46656947  1.        ]]

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  similarity_matrix[u1][u2] = np.corrcoef(vec1, vec2)[0][1]


In [4]:
similarity_matrix

Unnamed: 0,Alice,user1,user2,user3,user4
Alice,1.0,0.852803,0.707107,0.0,-0.792118
user1,0.852803,1.0,0.467707,0.489956,-0.900149
user2,0.707107,0.467707,1.0,-0.161165,-0.466569
user3,0.0,0.489956,-0.161165,1.0,-0.641503
user4,-0.792118,-0.900149,-0.466569,-0.641503,1.0


In [5]:
target_user = 'Alice'
similarity_matrix[target_user]

Alice    1.000000
user1    0.852803
user2    0.707107
user3    0.000000
user4   -0.792118
Name: Alice, dtype: float64

In [6]:
num = 2
sim_users = similarity_matrix[target_user].sort_values(ascending=False)[1: num + 1].index.tolist()
sim_users

['user1', 'user2']

In [10]:
weighted_scores = 0.
corr_value_sum = 0.
target_item = 'E'
for user in sim_users:
    corr_value = similarity_matrix[target_user][user]
    user_mean_rating = np.mean(list(user_data[user].values()))
    
    weighted_scores += corr_value * (user_data[user][target_item] -user_mean_rating)
    corr_value_sum += corr_value
target_user_mean_rating = np.mean(list(user_data[target_user].values()))
target_item_pred = target_user_mean_rating + weighted_scores / corr_value_sum
print(target_item_pred)

4.871979899370592
