In [1]:
import pandas as pd

# Load the data
orders = pd.read_csv('olist_orders_dataset.csv', usecols=['order_id', 'customer_id'])
order_items = pd.read_csv('olist_order_items_dataset.csv', usecols=['order_id', 'product_id', 'order_item_id'])
customers = pd.read_csv('olist_customers_dataset.csv', usecols=['customer_id', 'customer_unique_id', 'customer_zip_code_prefix'])

# Reduce the dataset size by sampling
sampled_orders = orders.sample(frac=0.05, random_state=42)  # Reduce to 5% of the dataset
sampled_order_items = order_items[order_items['order_id'].isin(sampled_orders['order_id'])]
sampled_customers = customers[customers['customer_id'].isin(sampled_orders['customer_id'])]

# Merge the sampled data
orders_items_customers = pd.merge(sampled_order_items, sampled_orders, on='order_id')
orders_items_customers = pd.merge(orders_items_customers, sampled_customers, on='customer_id')

# Save the sampled dataset to work with
orders_items_customers.to_csv('sampled_orders_items_customers.csv', index=False)
sampled_customers.to_csv('sampled_customers.csv', index=False)


In [2]:
from surprise import SVD, Dataset, Reader

# Prepare the data
def prepare_data(orders_items_customers):
    # Use only the required columns to build the matrix
    customer_product_matrix = orders_items_customers.pivot_table(
        index='customer_unique_id',
        columns='product_id',
        values='order_item_id',
        aggfunc='count',
        fill_value=0
    )
    return customer_product_matrix

# Train-test split and build dataset
def train_test_split_data(data):
    reader = Reader(rating_scale=(0, 1))
    df = data.stack().reset_index(name='purchase')
    df = df[df['purchase'] > 0]  # Remove zeros
    data = Dataset.load_from_df(df[['customer_unique_id', 'product_id', 'purchase']], reader)
    trainset = data.build_full_trainset()
    return trainset

# Collaborative Filtering with SVD
def collaborative_filtering(trainset):
    algo = SVD()
    algo.fit(trainset)
    return algo

# Generate recommendations
def generate_recommendations(algo, customer_product_matrix):
    recommendations = {}
    for customer in customer_product_matrix.index:
        customer_recs = []
        for product in customer_product_matrix.columns:
            pred = algo.predict(customer, product).est
            customer_recs.append((product, pred))
        customer_recs.sort(key=lambda x: x[1], reverse=True)
        recommendations[customer] = customer_recs[:5]  # Top 5 recommendations
    return recommendations

# Grouping by Location
def group_by_location(customers, recommendations):
    location_groups = customers.groupby('customer_zip_code_prefix')
    location_recommendations = {}

    for location, group in location_groups:
        location_recs = {}
        for customer in group['customer_unique_id']:
            if customer in recommendations:
                location_recs[customer] = recommendations[customer]
        location_recommendations[location] = location_recs
    return location_recommendations

# Main function to execute the code
def main():
    # Load the sampled data
    orders_items_customers = pd.read_csv('sampled_orders_items_customers.csv')
    customers = pd.read_csv('sampled_customers.csv')

    # Proceed with the existing logic
    customer_product_matrix = prepare_data(orders_items_customers)
    trainset = train_test_split_data(customer_product_matrix)
    algo = collaborative_filtering(trainset)
    recommendations = generate_recommendations(algo, customer_product_matrix)
    location_recommendations = group_by_location(customers, recommendations)

    print("Location-based Recommendations (sample):")
    for location, recs in list(location_recommendations.items())[:3]:
        print(f"Location: {location}, Recommendations: {recs}")

if __name__ == "__main__":
    main()


Location-based Recommendations (sample):
Location: 1007, Recommendations: {'aa1c9c908a46198cf8b8fdc5c8037688': [('001b72dfd63e9833e8c02742adf472e3', 1), ('0030026a6ddb3b2d1d4bc225b4b4c4da', 1), ('003a31970fea14fffe92ac856b8a9b97', 1), ('006baa9a5b8f95895f15273a35bc2664', 1), ('008cff0e5792219fae03e570f980b330', 1)]}
Location: 1008, Recommendations: {'964eb1b98713e4527eb11bce9a0ac1fe': [('001b72dfd63e9833e8c02742adf472e3', 1), ('002159fe700ed3521f46cfcf6e941c76', 1), ('003a31970fea14fffe92ac856b8a9b97', 1), ('006baa9a5b8f95895f15273a35bc2664', 1), ('008cff0e5792219fae03e570f980b330', 1)]}
Location: 1011, Recommendations: {'35052a34c8cddade15549afde5580e83': [('001b72dfd63e9833e8c02742adf472e3', 1), ('002159fe700ed3521f46cfcf6e941c76', 1), ('0030026a6ddb3b2d1d4bc225b4b4c4da', 1), ('003a31970fea14fffe92ac856b8a9b97', 1), ('006baa9a5b8f95895f15273a35bc2664', 1)]}
