In [1]:
!pip install scipy



# Collaborative Filtering with Singular Value Decomposition (SVD)

Singular value decomposition (SVD) is a linear algebra technique used in collaborative filtering to build recommender systems. SVD is based on dimensionality reduction and can help to capture similarities between users and items. It can also reveal information about the structure of a matrix, such as hidden connections between people and items. For example, in a movie recommendation system, SVD could reveal a hidden connection between people and books based on genre or author

## Loading Data: This code reads the CSV files into DataFrames.

In [3]:
import pandas as pd

# Load datasets
customers = pd.read_csv('data/olist_customers_dataset.csv')
geolocations = pd.read_csv('data/olist_geolocation_dataset.csv')
order_items = pd.read_csv('data/olist_order_items_dataset.csv')
payments = pd.read_csv('data/olist_order_payments_dataset.csv')
reviews = pd.read_csv('data/olist_order_reviews_dataset.csv')
orders = pd.read_csv('data/olist_orders_dataset.csv')
products = pd.read_csv('data/olist_products_dataset.csv')
sellers = pd.read_csv('data/olist_sellers_dataset.csv')
product_category_translation = pd.read_csv('data/product_category_name_translation.csv')

## Merging Data: 

This code merges the DataFrames to create a comprehensive dataset (orders_products_df).


In [8]:
# Recreate the merged orders_products_df
orders_customers_df = orders.merge(customers, on='customer_id')
orders_items_df = orders_customers_df.merge(order_items, on='order_id')
orders_reviews_df = orders_items_df.merge(reviews, on='order_id', how='left')
orders_products_df = orders_reviews_df.merge(products, on='product_id')

# Create the user-item interaction matrix
user_item_matrix = orders_products_df.pivot_table(index='customer_unique_id', columns='product_id', values='order_id', aggfunc='count', fill_value=0)

  user_item_matrix = orders_products_df.pivot_table(index='customer_unique_id', columns='product_id', values='order_id', aggfunc='count', fill_value=0)


ModuleNotFoundError: No module named 'ace_tools'

## Creating User-Item Matrix: 

It creates a matrix where each row represents a customer and each column represents a product, with values indicating the number of interactions (e.g., purchases).


In [9]:
user_item_matrix.shape

(95420, 32951)

In [10]:
user_item_matrix.head()

product_id,00066f42aeeb9f3007548bb9d3f33c38,00088930e925c41fd95ebfe695fd2655,0009406fd7479715e4bef61dd91f2462,000b8f95fcb9e0096488278317764d19,000d9be29b5207b54e86aa1b1ac54872,0011c512eb256aa0dbbb544d8dffcf6e,00126f27c813603687e6ce486d909d01,001795ec6f1b187d37335e1c4704762e,001b237c0e9bb435f2e54071129237e9,001b72dfd63e9833e8c02742adf472e3,...,ffef256879dbadcab7e77950f4f4a195,fff0a542c3c62682f23305214eaeaa24,fff1059cd247279f3726b7696c66e44e,fff28f91211774864a1000f918ed00cc,fff515ea94dbf35d54d256b3e39f0fea,fff6177642830a9a94a0f2cba5e476d1,fff81cc3158d2725c0655ab9ba0f712c,fff9553ac224cec9d15d49f5a263411f,fffdb2d0ec8d6a61f0a0a0db3f25b441,fffe9eeff12fcbd74a2f2b007dde0c58
customer_unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0000366f3b9a7992bf8c76cfdf3221e2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0000b849f77a49e4a4ce2b2a4ca5be3f,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0000f46a3911fa3c0805444483337064,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0000f6ccb0745a6a4b88665a16c9f078,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0004aac84e0df4da2b147fca70cf8255,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Collaborative Filtering: 

SVD is applied to the matrix to decompose it, predict ratings, and generate product recommendations for users.


In [12]:
type(user_item_matrix)

pandas.core.frame.DataFrame

In [15]:
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix

import numpy as np

# Convert the user-item interaction matrix to a sparse matrix
user_item_sparse_matrix = csr_matrix(user_item_matrix.values)

# Perform Singular Value Decomposition (SVD)
U, sigma, Vt = svds(user_item_sparse_matrix, k=50)
sigma = np.diag(sigma)

# Predict the user-item interaction matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=user_item_matrix.columns, index=user_item_matrix.index)

ValueError: `A` must be of floating or complex floating data type.

We had some trouble implementing the SVDs, we are still working on it.

In [None]:

# Function to recommend products for a given user
def recommend_products(user_id, num_recommendations=5):
    user_row_number = user_item_matrix.index.get_loc(user_id)
    sorted_user_ratings = predicted_ratings_df.iloc[user_row_number].sort_values(ascending=False)
    recommended_products = sorted_user_ratings.head(num_recommendations).index
    return recommended_products

# Example: Recommend products for a specific user
example_user_id = user_item_matrix.index[0]
recommended_products = recommend_products(example_user_id)
print(f"Recommended products for user {example_user_id}: {recommended_products}")