In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [42]:

# Load datasets
customers = pd.read_csv('customers.csv', index_col='customer_id')
products = pd.read_csv('products.csv', index_col='product_id')
ratings = pd.read_csv('ratings.csv', index_col=False)
orders = pd.read_csv('orders.csv', index_col='order_id')

products


Unnamed: 0_level_0,product_name,price,category
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Ektorp Sofa,694,Sofas & Armchairs
2,Poäng Armchair,569,Sofas & Armchairs
3,Klippan Loveseat,639,Sofas & Armchairs
4,Malm Bed Frame,202,Beds
5,Hemnes Daybed,845,Beds
6,Brimnes Bed Storage,773,Beds
7,Lack Side Table,717,Tables & Desks
8,Melltorp Dining Table,369,Tables & Desks
9,Micke Desk,19,Tables & Desks
10,Kallax Shelving Unit,733,Storage Solutions


In [57]:
# Merge ratings with products
product_ratings = pd.merge(ratings, products, on='product_id', how='inner')
# print(product_ratings)


In [58]:
# Merge orders with product_ratings
order_details = pd.merge(orders, product_ratings, on=['customer_id', 'product_id'], how='inner')

# Convert order_date to datetime
order_details['order_date'] = pd.to_datetime(order_details['order_date'])


In [59]:
# Filter orders for the last month
last_month_orders = order_details[order_details['order_date'].dt.month != (pd.Timestamp.now() - pd.DateOffset(months=1)).month]

# Calculate revenue for each product
last_month_orders['revenue'] = last_month_orders['price'] * last_month_orders['quantity']

# Aggregate revenue and units sold for each product
product_performance = last_month_orders.groupby(['product_id', 'product_name']).agg(
    revenue=('revenue', 'sum'),
    units_sold=('quantity', 'sum')
).reset_index()

# Find top performing products in terms of revenue
top_products_revenue = product_performance.sort_values(by='revenue', ascending=False).head(10)

# Find top performing products in terms of units sold
top_products_units_sold = product_performance.sort_values(by='units_sold', ascending=False).head(10)

# Identify top clients for the last month
top_clients = last_month_orders.groupby(['customer_id']).agg(
    total_spent=('revenue', 'sum')
).reset_index().sort_values(by='total_spent', ascending=False).head(10)


In [60]:
# Display results

top_products_revenue_f = pd.DataFrame(top_products_revenue)
print("Top performing products in terms of revenue:")
top_products_revenue_f


Top performing products in terms of revenue:


Unnamed: 0,product_id,product_name,revenue,units_sold
4,6,Brimnes Bed Storage,8503,11
23,27,Ivar Cabinet,8244,12
37,44,Koppang Dresser,7650,10
10,12,Raskog Trolley,7640,10
27,31,Nockeby Sofa,7083,9
39,46,Valje Wall Cabinet,6690,10
38,45,Hektar Work Lamp,6456,8
33,40,Lommarp Bookcase,6020,7
1,3,Klippan Loveseat,5751,9
3,5,Hemnes Daybed,5070,6


In [40]:
print("\nTop performing products in terms of units sold:")
top_products_units_sold_f = pd.DataFrame(top_products_units_sold)
top_products_units_sold_f




Top performing products in terms of units sold:


Unnamed: 0,product_id,product_name,revenue,units_sold
26,30,Strandmon Wing Chair,1148,14
23,27,Ivar Cabinet,8244,12
4,6,Brimnes Bed Storage,8503,11
37,44,Koppang Dresser,7650,10
39,46,Valje Wall Cabinet,6690,10
10,12,Raskog Trolley,7640,10
27,31,Nockeby Sofa,7083,9
1,3,Klippan Loveseat,5751,9
20,22,Nordli Chest Drawers,4488,8
12,14,Ingolf Bar Stool,4872,8


In [62]:
print("\nTop clients for the last month:")

top_clients_f = pd.DataFrame(top_clients)
top_clients_f


Top clients for the last month:


Unnamed: 0,customer_id,total_spent
30,47,6918
25,39,6112
4,8,5536
35,54,5209
49,84,5127
3,7,4921
51,86,4630
48,83,4408
24,38,4388
42,69,4278
