In [1]:
import numpy as np
import pandas as pd

# Define number of users and items
num_users = 5
num_items = 6

# Generate random data: user-item interaction (ratings or binary interactions)
# Using random integers for ratings, can also use binary (0 or 1) for interaction
np.random.seed(42)  # For reproducibility
interaction_data = np.random.randint(1, 6, size=(num_users, num_items))  # Ratings between 1 and 5

# Create DataFrame for better visualization
interaction_df = pd.DataFrame(interaction_data, columns=[f"Item_{i+1}" for i in range(num_items)],
                               index=[f"User_{i+1}" for i in range(num_users)])

print("User-Item Interaction Data:")
print(interaction_df)


User-Item Interaction Data:
        Item_1  Item_2  Item_3  Item_4  Item_5  Item_6
User_1       4       5       3       5       5       2
User_2       3       3       3       5       4       3
User_3       5       2       4       2       4       5
User_4       1       4       2       5       4       1
User_5       1       3       3       2       4       4


In [2]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate user-user similarity matrix
user_similarity = cosine_similarity(interaction_df)

# Convert the similarity matrix to a DataFrame for better readability
user_similarity_df = pd.DataFrame(user_similarity, columns=interaction_df.index, index=interaction_df.index)

print("User-User Similarity Matrix:")
print(user_similarity_df)


User-User Similarity Matrix:
          User_1    User_2    User_3    User_4    User_5
User_1  1.000000  0.972204  0.847571  0.951270  0.872662
User_2  0.972204  1.000000  0.888925  0.933249  0.906620
User_3  0.847571  0.888925  1.000000  0.690577  0.895443
User_4  0.951270  0.933249  0.690577  1.000000  0.832424
User_5  0.872662  0.906620  0.895443  0.832424  1.000000


In [3]:
# Calculate item-item similarity matrix (transpose of the user-item matrix)
item_similarity = cosine_similarity(interaction_df.T)

# Convert the similarity matrix to a DataFrame for better readability
item_similarity_df = pd.DataFrame(item_similarity, columns=interaction_df.columns, index=interaction_df.columns)

print("Item-Item Similarity Matrix:")
print(item_similarity_df)


Item-Item Similarity Matrix:
          Item_1    Item_2    Item_3    Item_4    Item_5    Item_6
Item_1  1.000000  0.803685  0.930480  0.791521  0.881972  0.878850
Item_2  0.803685  1.000000  0.900486  0.968030  0.974894  0.764471
Item_3  0.930480  0.900486  1.000000  0.864581  0.974084  0.963753
Item_4  0.791521  0.968030  0.864581  1.000000  0.942434  0.710429
Item_5  0.881972  0.974894  0.974084  0.942434  1.000000  0.886167
Item_6  0.878850  0.764471  0.963753  0.710429  0.886167  1.000000


In [6]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import time

# Function to calculate user-user similarity efficiently (O(m * n') time)
def calculate_user_user_similarity(interaction_matrix, n_prime):
    m = interaction_matrix.shape[0]  # number of users
    n = interaction_matrix.shape[1]  # number of items

    # Initialize similarity matrix with shape m x m (space complexity O(m²))
    user_similarity_matrix = np.zeros((m, m))

    # Iterate through each pair of users (O(m * m) pairs)
    for i in range(m):
        for j in range(i, m):  # We only calculate for upper triangle, symmetry
            # Find common non-zero ratings between user i and user j
            common_items = np.where((interaction_matrix[i, :] > 0) & (interaction_matrix[j, :] > 0))[0]
            if len(common_items) >= n_prime:  # Check if the number of common ratings is at least n'
                user_similarity_matrix[i, j] = cosine_similarity(
                    interaction_matrix[i, common_items].reshape(1, -1),
                    interaction_matrix[j, common_items].reshape(1, -1)
                )[0][0]
                user_similarity_matrix[j, i] = user_similarity_matrix[i, j]  # Symmetric similarity

    return user_similarity_matrix


# Function to calculate item-item similarity efficiently (O(n * m') time)
def calculate_item_item_similarity(interaction_matrix, m_prime):
    m = interaction_matrix.shape[0]  # number of users
    n = interaction_matrix.shape[1]  # number of items

    # Initialize similarity matrix with shape n x n (space complexity O(n²))
    item_similarity_matrix = np.zeros((n, n))

    # Iterate through each pair of items (O(n * n) pairs)
    for i in range(n):
        for j in range(i, n):  # We only calculate for upper triangle, symmetry
            # Find common users who have rated both item i and item j
            common_users = np.where((interaction_matrix[:, i] > 0) & (interaction_matrix[:, j] > 0))[0]
            if len(common_users) >= m_prime:  # Check if the number of common users is at least m'
                item_similarity_matrix[i, j] = cosine_similarity(
                    interaction_matrix[common_users, i].reshape(1, -1).T,
                    interaction_matrix[common_users, j].reshape(1, -1).T
                )[0][0]
                item_similarity_matrix[j, i] = item_similarity_matrix[i, j]  # Symmetric similarity

    return item_similarity_matrix

# Time and Space Complexity Calculation
def calculate_time_and_space_complexity(interaction_matrix, n_prime, m_prime):
    m, n = interaction_matrix.shape

    # Calculate User-User Similarity
    start_time = time.time()
    user_similarity = calculate_user_user_similarity(interaction_matrix, n_prime)
    user_similarity_time = time.time() - start_time
    user_similarity_space = m * m  # Space for the user-user similarity matrix

    # Calculate Item-Item Similarity
    start_time = time.time()
    item_similarity = calculate_item_item_similarity(interaction_matrix, m_prime)
    item_similarity_time = time.time() - start_time
    item_similarity_space = n * n  # Space for the item-item similarity matrix

    # Calculate total space complexity (including interaction matrix)
    total_space = m * n + user_similarity_space + item_similarity_space

    return {
        "user_similarity_time": user_similarity_time,
        "item_similarity_time": item_similarity_time,
        "user_similarity_space": user_similarity_space,
        "item_similarity_space": item_similarity_space,
        "total_space": total_space
    }

# Generate random interaction data (for testing)
np.random.seed(42)
m = 5  # number of users
n = 6  # number of items
interaction_data = np.random.randint(1, 6, size=(m, n))  # Random ratings between 1 and 5

# Create the interaction matrix
interaction_matrix = np.array(interaction_data)

# Set thresholds for common ratings and common users/items
n_prime = 3  # Minimum number of common ratings to compute similarity
m_prime = 3  # Minimum number of common users to compute similarity

# Calculate Time and Space Complexity
complexity = calculate_time_and_space_complexity(interaction_matrix, n_prime, m_prime)

print("Time and Space Complexity Metrics:")
print(f"User-User Similarity Time: {complexity['user_similarity_time']} seconds")
print(f"Item-Item Similarity Time: {complexity['item_similarity_time']} seconds")
print(f"User-User Similarity Space: {complexity['user_similarity_space']} units")
print(f"Item-Item Similarity Space: {complexity['item_similarity_space']} units")
print(f"Total Space: {complexity['total_space']} units")


Time and Space Complexity Metrics:
User-User Similarity Time: 0.008985757827758789 seconds
Item-Item Similarity Time: 0.011535167694091797 seconds
User-User Similarity Space: 25 units
Item-Item Similarity Space: 36 units
Total Space: 91 units


In [7]:
import numpy as np

# Function to calculate sparsity of a matrix
def calculate_sparsity(matrix):
    # Total number of elements in the matrix
    total_elements = matrix.size

    # Number of zero entries in the matrix
    zero_entries = np.count_nonzero(matrix == 0)

    # Sparsity is the ratio of zero entries to total elements
    sparsity = zero_entries / total_elements

    return sparsity, zero_entries, total_elements

# Example Usage
np.random.seed(42)
m = 5  # number of users
n = 6  # number of items

# Generate a random user-item interaction matrix with values between 0 and 5
interaction_matrix = np.random.randint(0, 6, size=(m, n))

# Simulate sparsity by setting some values to 0
interaction_matrix[0, 1] = 0
interaction_matrix[2, 3] = 0
interaction_matrix[4, 5] = 0

print("User-Item Interaction Matrix:")
print(interaction_matrix)

# Calculate the sparsity of the interaction matrix
sparsity, zero_entries, total_elements = calculate_sparsity(interaction_matrix)

print("\nSparsity Information:")
print(f"Sparsity: {sparsity:.4f}")
print(f"Zero Entries: {zero_entries}")
print(f"Total Elements: {total_elements}")


User-Item Interaction Matrix:
[[3 0 2 4 4 1]
 [2 2 2 4 3 2]
 [5 4 1 0 5 5]
 [1 3 4 0 3 1]
 [5 4 3 0 0 0]]

Sparsity Information:
Sparsity: 0.2000
Zero Entries: 6
Total Elements: 30
