<a href="https://colab.research.google.com/github/AhmedNabil30/week8/blob/main/Aly_Maher_221101789_lRS_Lab_complexity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import time

# Step 1: Create and process the data
def create_user_item_matrix(data):
    # Convert data to DataFrame
    df = pd.DataFrame(data)
    print("Original DataFrame:")
    print(df)

    # Handle duplicate ratings by summing them up
    df_grouped = df.groupby(['user', 'item'], as_index=False)['rating'].sum()

    # Pivot the table to create the User-Item Matrix
    user_item_matrix = df_grouped.pivot(index='user', columns='item', values='rating').fillna(0)
    print("\nUser-Item Matrix:")
    print(user_item_matrix)
    return user_item_matrix

# Step 2: Collaborative Filtering
def compute_user_similarity(user_item_matrix):
    """Calculate user-user similarity using cosine similarity."""
    user_similarity = cosine_similarity(user_item_matrix)
    return pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

def compute_item_similarity(user_item_matrix):
    """Calculate item-item similarity using cosine similarity."""
    item_similarity = cosine_similarity(user_item_matrix.T)  # Transpose for item-based similarity
    return pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

# Step 3: Measure Time Complexity
def measure_time_complexity(user_item_matrix):
    start_time = time.time()
    user_similarity = compute_user_similarity(user_item_matrix)
    user_time = time.time() - start_time

    start_time = time.time()
    item_similarity = compute_item_similarity(user_item_matrix)
    item_time = time.time() - start_time

    print(f"\nTime taken for User-based CF: {user_time:.6f} seconds")
    print(f"Time taken for Item-based CF: {item_time:.6f} seconds")
    return user_similarity, item_similarity

# Step 4: Complexity Analysis
def analyze_complexity(user_item_matrix):
    num_users, num_items = user_item_matrix.shape
    print("\nComplexity Analysis:")
    print(f"User-based CF Time Complexity: O(N^2 * M) = O({num_users}^2 * {num_items})")
    print(f"User-based CF Space Complexity: O(N^2) = O({num_users}^2)")
    print(f"Item-based CF Time Complexity: O(M^2 * N) = O({num_items}^2 * {num_users})")
    print(f"Item-based CF Space Complexity: O(M^2) = O({num_items}^2)")

# Main function to execute the workflow
def main():
    # Input Data
    data = {
        'user': ['User1', 'User2', 'User3', 'User1', 'User2', 'User4', 'User3', 'User5', 'User4'],
        'item': ['ItemA', 'ItemB', 'ItemC', 'ItemB', 'ItemC', 'ItemA', 'ItemC', 'ItemA', 'ItemB'],
        'rating': [5, 4, 3, 4, 2, 5, 1, 4, 3]
    }

    # Create User-Item Matrix
    user_item_matrix = create_user_item_matrix(data)

    # Analyze Complexity
    analyze_complexity(user_item_matrix)

    # Measure Time Complexity and display results
    user_similarity, item_similarity = measure_time_complexity(user_item_matrix)

    # Display similarity matrices
    print("\n--- User Similarity Matrix (User-based CF) ---")
    print(user_similarity)
    print("\n--- Item Similarity Matrix (Item-based CF) ---")
    print(item_similarity)

if __name__ == "__main__":
    main()


Original DataFrame:
    user   item  rating
0  User1  ItemA       5
1  User2  ItemB       4
2  User3  ItemC       3
3  User1  ItemB       4
4  User2  ItemC       2
5  User4  ItemA       5
6  User3  ItemC       1
7  User5  ItemA       4
8  User4  ItemB       3

User-Item Matrix:
item   ItemA  ItemB  ItemC
user                      
User1    5.0    4.0    0.0
User2    0.0    4.0    2.0
User3    0.0    0.0    4.0
User4    5.0    3.0    0.0
User5    4.0    0.0    0.0

Complexity Analysis:
User-based CF Time Complexity: O(N^2 * M) = O(5^2 * 3)
User-based CF Space Complexity: O(N^2) = O(5^2)
Item-based CF Time Complexity: O(M^2 * N) = O(3^2 * 5)
Item-based CF Space Complexity: O(M^2) = O(3^2)

Time taken for User-based CF: 0.003962 seconds
Time taken for Item-based CF: 0.001658 seconds

--- User Similarity Matrix (User-based CF) ---
user      User1     User2     User3     User4     User5
user                                                   
User1  1.000000  0.558744  0.000000  0.990992  0.

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import time

# Step 1: Create User-Item DataFrame with a New Matrix
def create_user_item_df():
    data = {
        'ItemA': [4, 0, 3, 5, 0],
        'ItemB': [0, 5, 0, 2, 1],
        'ItemC': [2, 4, 0, 0, 3],
        'ItemD': [0, 0, 4, 1, 5],
        'ItemE': [5, 0, 1, 0, 4]
    }
    user_item_df = pd.DataFrame(data)
    user_item_df.index.name = 'User'
    print("User-Item DataFrame:")
    print(user_item_df)
    return user_item_df

# Step 2: Compute User Similarity Matrix
def compute_user_similarity(user_item_df):
    start_time = time.time()
    user_similarity = cosine_similarity(user_item_df)
    end_time = time.time()

    # Convert to DataFrame for readability
    user_similarity_df = pd.DataFrame(user_similarity, index=user_item_df.index, columns=user_item_df.index)
    computation_time = end_time - start_time

    print("\nUser Similarity DataFrame:")
    print(user_similarity_df)
    print(f"\nTime to compute user similarity: {computation_time:.6f} seconds")
    print(f"Time Complexity: O({user_item_df.shape[0]}^2 * {user_item_df.shape[1]})")
    print(f"Space Complexity: O({user_item_df.shape[0]}^2)")
    return user_similarity_df

# Step 3: Compute Item Similarity Matrix
def compute_item_similarity(user_item_df):
    start_time = time.time()
    item_similarity = cosine_similarity(user_item_df.T)  # Transpose for item similarity
    end_time = time.time()

    # Convert to DataFrame for readability
    item_similarity_df = pd.DataFrame(item_similarity, index=user_item_df.columns, columns=user_item_df.columns)
    computation_time = end_time - start_time

    print("\nItem Similarity DataFrame:")
    print(item_similarity_df)
    print(f"\nTime to compute item similarity: {computation_time:.6f} seconds")
    print(f"Time Complexity: O({user_item_df.shape[1]}^2 * {user_item_df.shape[0]})")
    print(f"Space Complexity: O({user_item_df.shape[1]}^2)")
    return item_similarity_df

# Main function to execute the workflow
def main():
    user_item_df = create_user_item_df()

    # Compute User Similarity
    user_similarity_df = compute_user_similarity(user_item_df)

    # Compute Item Similarity
    item_similarity_df = compute_item_similarity(user_item_df)

if __name__ == "__main__":
    main()


User-Item DataFrame:
      ItemA  ItemB  ItemC  ItemD  ItemE
User                                   
0         4      0      2      0      5
1         0      5      4      0      0
2         3      0      0      4      1
3         5      2      0      1      0
4         0      1      3      5      4

User Similarity DataFrame:
User         0         1         2         3         4
User                                                  
0     1.000000  0.186248  0.497000  0.544331  0.542728
1     0.186248  1.000000  0.000000  0.285133  0.371768
2     0.497000  0.000000  1.000000  0.680309  0.659082
3     0.544331  0.285133  0.680309  1.000000  0.178958
4     0.542728  0.371768  0.659082  0.178958  1.000000

Time to compute user similarity: 0.000879 seconds
Time Complexity: O(5^2 * 5)
Space Complexity: O(5^2)

Item Similarity DataFrame:
          ItemA     ItemB     ItemC     ItemD     ItemE
ItemA  1.000000  0.258199  0.210090  0.370970  0.501901
ItemB  0.258199  1.000000  0.779773  0.197