In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import time


In [5]:
data = {
    'user': ['User1', 'User2', 'User3', 'User1', 'User2', 'User4', 'User3', 'User5', 'User4'],
    'item': ['ItemA', 'ItemB', 'ItemC', 'ItemB', 'ItemC', 'ItemA', 'ItemC', 'ItemA', 'ItemB'],
    'rating': [5, 4, 3, 4, 2, 5, 1, 4, 3]
}
data = pd.DataFrame (data)
data

Unnamed: 0,user,item,rating
0,User1,ItemA,5
1,User2,ItemB,4
2,User3,ItemC,3
3,User1,ItemB,4
4,User2,ItemC,2
5,User4,ItemA,5
6,User3,ItemC,1
7,User5,ItemA,4
8,User4,ItemB,3


In [6]:
df_grouped = df.groupby(['user', 'item'], as_index=False)['rating'].sum()

user_item_matrix = df_grouped.pivot(index='user', columns='item', values='rating').fillna(0)
print("\nUser-Item Matrix:")
print(user_item_matrix)



User-Item Matrix:
item   ItemA  ItemB  ItemC
user                      
User1    5.0    4.0    0.0
User2    0.0    4.0    2.0
User3    0.0    0.0    4.0
User4    5.0    3.0    0.0
User5    4.0    0.0    0.0


In [7]:
def user_based_cf(user_item_matrix):
    user_similarity = cosine_similarity(user_item_matrix)
    user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
    return user_similarity_df

def item_based_cf(user_item_matrix):
    item_similarity = cosine_similarity(user_item_matrix.T)  # Transpose to calculate item-item similarity
    item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)
    return item_similarity_df

def measure_time_complexity():
    # User-based CF
    start_time = time.time()
    user_similarity_df = user_based_cf(user_item_matrix)
    user_based_time = time.time() - start_time

    # Item-based CF
    start_time = time.time()
    item_similarity_df = item_based_cf(user_item_matrix)
    item_based_time = time.time() - start_time

    print(f"\nTime taken for User-based CF: {user_based_time:.6f} seconds")
    print(f"Time taken for Item-based CF: {item_based_time:.6f} seconds")


In [9]:

# Print Time and Space Complexity for both methods
def complexity_analysis(user_item_matrix):
    N = user_item_matrix.shape[0]  # Number of users
    M = user_item_matrix.shape[1]  # Number of items

    # User-based CF Time and Space Complexity
    user_based_time_complexity = f"O(N^2 * M) = O({N}^2 * {M})"
    user_based_space_complexity = f"O(N^2) = O({N}^2)"

    # Item-based CF Time and Space Complexity
    item_based_time_complexity = f"O(M^2 * N) = O({M}^2 * {N})"
    item_based_space_complexity = f"O(M^2) = O({M}^2)"

    print("\nComplexity Analysis:")
    print(f"User-based CF Time Complexity: {user_based_time_complexity}")
    print(f"User-based CF Space Complexity: {user_based_space_complexity}")
    print(f"Item-based CF Time Complexity: {item_based_time_complexity}")
    print(f"Item-based CF Space Complexity: {item_based_space_complexity}")

# Run the complete code
if __name__ == "__main__":
    # Show the DataFrame and User-Item matrix
    print("\n--- Step 1: DataFrame and User-Item Matrix ---")
    print(data)

    # Perform Time and Space Complexity Analysis
    complexity_analysis(user_item_matrix)

    # Measure Time Complexity for both User-based and Item-based CF
    measure_time_complexity()

    # Show similarity matrices
    print("\n--- User Similarity Matrix (User-based CF) ---")
    print(user_based_cf(user_item_matrix))

    print("\n--- Item Similarity Matrix (Item-based CF) ---")
    print(item_based_cf(user_item_matrix))


--- Step 1: DataFrame and User-Item Matrix ---
    user   item  rating
0  User1  ItemA       5
1  User2  ItemB       4
2  User3  ItemC       3
3  User1  ItemB       4
4  User2  ItemC       2
5  User4  ItemA       5
6  User3  ItemC       1
7  User5  ItemA       4
8  User4  ItemB       3

Complexity Analysis:
User-based CF Time Complexity: O(N^2 * M) = O(5^2 * 3)
User-based CF Space Complexity: O(N^2) = O(5^2)
Item-based CF Time Complexity: O(M^2 * N) = O(3^2 * 5)
Item-based CF Space Complexity: O(M^2) = O(3^2)

Time taken for User-based CF: 0.000000 seconds
Time taken for Item-based CF: 0.000000 seconds

--- User Similarity Matrix (User-based CF) ---
user      User1     User2     User3     User4     User5
user                                                   
User1  1.000000  0.558744  0.000000  0.990992  0.780869
User2  0.558744  1.000000  0.447214  0.460179  0.000000
User3  0.000000  0.447214  1.000000  0.000000  0.000000
User4  0.990992  0.460179  0.000000  1.000000  0.857493
User5