In [6]:
import numpy as np
import pandas as pd

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def jaccard_similarity(a, b):
    intersection = np.logical_and(a, b)
    union = np.logical_or(a, b)
    return np.sum(intersection) / np.sum(union)


def adj_cos(a, b):
    
    # Convert to pandas Series
    a = pd.Series(a)
    b = pd.Series(b)
    
    # Return 0 if one of them has length `0`
    if a.empty or b.empty:
        return 0

    common = pd.concat([a, b], axis=1).dropna()
    # Return 0 if there are no common non-null values
    if common.empty:
        return np.nan

    # lets get common values from both series
    a_common = a[common.index]
    b_common = b[common.index]

    # Calculate the pearson correlation
    a_mean = a_common.mean()
    b_mean = b_common.mean()
    a_diff = a_common - a_mean
    b_diff = b_common - b_mean

    dot_product = np.dot(a_diff, b_diff)
    norm_a = np.linalg.norm(a_diff)
    norm_b = np.linalg.norm(b_diff)

    # handle when norm is 0
    if norm_a == 0 or norm_b == 0:
        return np.nan

    return dot_product / (norm_a * norm_b)

# Example user profiles as binary feature vectors
# Each feature could represent: likes sports, likes music, likes travel, is active, has photo, etc.
user1 = np.array([1, 0, 1, 1, 1]) # user
user2 = np.array([1, 0, 1, 1, 1]) # similar to user1
user3 = np.array([0, 1, 0, 0, 0]) # dissimilar to user1
user4 = np.array([1, 0, 1, np.nan, np.nan]) # semi-similar to user1

# Calculate similarities
cos_sim_1_2 = cosine_similarity(user1, user2)
cos_sim_1_3 = cosine_similarity(user1, user3)
cos_sim_1_4 = cosine_similarity(user1, user4)


jac_sim_1_2 = jaccard_similarity(user1, user2)
jac_sim_1_3 = jaccard_similarity(user1, user3)
jac_sim_1_4 = jaccard_similarity(user1, user4)

adj_cos_1_2 = adj_cos(user1, user2)
adj_cos_1_3 = adj_cos(user1, user3)
adj_cos_1_4 = adj_cos(user1, user4)

print(f"Cosine Similarity between User 1 and User 2: {cos_sim_1_2:.4f}")
print(f"Cosine Similarity between User 1 and User 3: {cos_sim_1_3:.4f}")
print(f"Cosine Similarity between User 1 and User 4: {cos_sim_1_4:.4f}")

print(f"\nJaccard Similarity between User 1 and User 2: {jac_sim_1_2:.4f}")
print(f"Jaccard Similarity between User 1 and User 3: {jac_sim_1_3:.4f}")
print(f"Jaccard Similarity between User 1 and User 4: {jac_sim_1_4:.4f}")

print(f"\nAdjusted Cosine Similarity between User 1 and User 2: {adj_cos_1_2:.4f}")
print(f"Adjusted Cosine Similarity between User 1 and User 3: {adj_cos_1_3:.4f}")
print(f"Adjusted Cosine Similarity between User 1 and User 4: {adj_cos_1_4:.4f}")




Cosine Similarity between User 1 and User 2: 1.0000
Cosine Similarity between User 1 and User 3: 0.0000
Cosine Similarity between User 1 and User 4: nan

Jaccard Similarity between User 1 and User 2: 1.0000
Jaccard Similarity between User 1 and User 3: 0.0000
Jaccard Similarity between User 1 and User 4: 1.0000

Adjusted Cosine Similarity between User 1 and User 2: 1.0000
Adjusted Cosine Similarity between User 1 and User 3: -1.0000
Adjusted Cosine Similarity between User 1 and User 4: 1.0000
