In [21]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import time

# Step 1: Create a new dataset (User-Item-Rating Triplet)
data = {
    'user': ['Alice', 'Bob', 'Charlie', 'Alice', 'Bob', 'David', 'Charlie', 'Eve', 'David'],
    'item': ['Book1', 'Book2', 'Book3', 'Book2', 'Book3', 'Book1', 'Book3', 'Book1', 'Book2'],
    'rating': [4, 5, 2, 3, 1, 5, 1, 4, 3]
}

df = pd.DataFrame(data)
print("=== Original DataFrame ===")
print(df)

# Step 2: Aggregate duplicate ratings (e.g., sum them up)
df_grouped = df.groupby(['user', 'item'], as_index=False)['rating'].sum()

# Pivot to create the User-Item Matrix
user_item_matrix = df_grouped.pivot(index='user', columns='item', values='rating').fillna(0)
print("\n=== User-Item Matrix ===")
print(user_item_matrix)

# Step 3: Collaborative Filtering Similarity Computation
def compute_similarity(matrix, axis=0):
    """
    Compute similarity matrix.
    - axis=0: user-based similarity
    - axis=1: item-based similarity
    """
    if axis == 1:  # Item-based similarity requires transposing
        matrix = matrix.T
    similarity = cosine_similarity(matrix)
    similarity_df = pd.DataFrame(
        similarity,
        index=matrix.index,
        columns=matrix.index
    )
    return similarity_df

# Step 4: Time Complexity Measurement
def measure_time_complexity(user_item_matrix):
    # User-based CF
    start_time = time.time()
    user_similarity_df = compute_similarity(user_item_matrix, axis=0)
    user_based_time = time.time() - start_time

    # Item-based CF
    start_time = time.time()
    item_similarity_df = compute_similarity(user_item_matrix, axis=1)
    item_based_time = time.time() - start_time

    print(f"\n=== Time Complexity ===")
    print(f"User-based CF Time: {user_based_time:.6f} seconds")
    print(f"Item-based CF Time: {item_based_time:.6f} seconds")

    return user_similarity_df, item_similarity_df

# Step 5: Complexity Analysis
def complexity_analysis(user_item_matrix):
    N = user_item_matrix.shape[0]  # Number of users
    M = user_item_matrix.shape[1]  # Number of items

    print("\n=== Complexity Analysis ===")
    print(f"User-based CF Time Complexity: O(N^2 * M) = O({N}^2 * {M})")
    print(f"User-based CF Space Complexity: O(N^2) = O({N}^2)")
    print(f"Item-based CF Time Complexity: O(M^2 * N) = O({M}^2 * {N})")
    print(f"Item-based CF Space Complexity: O(M^2) = O({M}^2)")

# Step 6: Execute the entire pipeline
if __name__ == "__main__":
    # Show DataFrame and User-Item Matrix
    print("\n=== Step 1: DataFrame ===")
    print(df_grouped)

    # Perform Complexity Analysis
    complexity_analysis(user_item_matrix)

    # Measure Time Complexity and get similarity matrices
    user_similarity_df, item_similarity_df = measure_time_complexity(user_item_matrix)

    # Display Similarity Matrices
    print("\n=== User Similarity Matrix (User-based CF) ===")
    print(user_similarity_df)

    print("\n=== Item Similarity Matrix (Item-based CF) ===")
    print(item_similarity_df)


=== Original DataFrame ===
      user   item  rating
0    Alice  Book1       4
1      Bob  Book2       5
2  Charlie  Book3       2
3    Alice  Book2       3
4      Bob  Book3       1
5    David  Book1       5
6  Charlie  Book3       1
7      Eve  Book1       4
8    David  Book2       3

=== User-Item Matrix ===
item     Book1  Book2  Book3
user                        
Alice      4.0    3.0    0.0
Bob        0.0    5.0    1.0
Charlie    0.0    0.0    3.0
David      5.0    3.0    0.0
Eve        4.0    0.0    0.0

=== Step 1: DataFrame ===
      user   item  rating
0    Alice  Book1       4
1    Alice  Book2       3
2      Bob  Book2       5
3      Bob  Book3       1
4  Charlie  Book3       3
5    David  Book1       5
6    David  Book2       3
7      Eve  Book1       4

=== Complexity Analysis ===
User-based CF Time Complexity: O(N^2 * M) = O(5^2 * 3)
User-based CF Space Complexity: O(N^2) = O(5^2)
Item-based CF Time Complexity: O(M^2 * N) = O(3^2 * 5)
Item-based CF Space Complexity: O(M^