In [1]:
import numpy as np
from scipy.spatial.distance import cosine, euclidean
from scipy.stats import pearsonr

In [3]:
# Sample user rating data
user1 = np.array([4, 5, 2, 3, 4])
user2 = np.array([5, 3, 2, 4, 5])

In [5]:
# 1. Cosine Similarity (1 - cosine distance gives similarity)
cosine_similarity = 1 - cosine(user1, user2)
print(f"Cosine Similarity: {cosine_similarity:.4f}")

Cosine Similarity: 0.9548


In [9]:
# 2. Pearson Correlation Similarity which returns correlation coefficient and p-values
pearson_corr, _ = pearsonr(user1, user2)
print(f"Pearson Correlation Similarity: {pearson_corr:.4f}")

Pearson Correlation Similarity: 0.4372


In [13]:
# 3. Euclidean Distance Similarity
euclidean_distance = euclidean(user1, user2)
euclidean_similarity = 1 / (1 + euclidean_distance) # Normalize to similarity range
print(f"Euclidean Distance Similarity: {euclidean_similarity:.4f}")

Euclidean Distance Similarity: 0.2743


In [21]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine, euclidean
from scipy.stats import pearsonr

In [31]:
#User-item rating matrix for 4 users
ratings = np.array([
    [5, 3, 4, 4, 2],  # User A
    [3, 1, 2, 3, 3],  # User B
    [4, 3, 4, 5, 1],  # User C
    [2, 2, 1, 2, 4]   # User D
])

users = ["Raju", "John", "Ramya", "Kishore"]
df = pd.DataFrame(ratings, index=users, columns=["Bahubali","Mufasa","Interstellar","RRR","Mrs"])
df

Unnamed: 0,Bahubali,Mufasa,Interstellar,RRR,Mrs
Raju,5,3,4,4,2
John,3,1,2,3,3
Ramya,4,3,4,5,1
Kishore,2,2,1,2,4


In [33]:
# Function to compute similarity
def compute_similarity(df):
    num_users = df.shape[0]
    similarity_results = []

    for i in range(num_users):
        for j in range(i + 1, num_users):  # Avoid redundant pairs
            user1, user2 = df.iloc[i], df.iloc[j]

            # Cosine Similarity
            cos_sim = 1 - cosine(user1, user2)

            # Pearson Correlation Similarity
            pearson_sim, _ = pearsonr(user1, user2)

            # Euclidean Distance Similarity
            euc_dist = euclidean(user1, user2)
            euc_sim = 1 / (1 + euc_dist)  # Normalize to [0,1]

            similarity_results.append([users[i], users[j], cos_sim, pearson_sim, euc_sim])

    return pd.DataFrame(similarity_results, columns=["User 1", "User 2", "Cosine Similarity", "Pearson Correlation", "Euclidean Similarity"])

# Compute similarity matrix
similarity_df = compute_similarity(df)

# Display results
similarity_df

Unnamed: 0,User 1,User 2,Cosine Similarity,Pearson Correlation,Euclidean Similarity
0,Raju,John,0.92967,0.196116,0.210897
1,Raju,Ramya,0.978337,0.838557,0.366025
2,Raju,Kishore,0.799014,-0.720577,0.16139
3,John,Ramya,0.885465,0.03686,0.195194
4,John,Kishore,0.919145,0.408248,0.309017
5,Ramya,Kishore,0.725961,-0.812605,0.150221
