In [1]:
import os

import pandas as pd

from IPython.display import display
from recommender import RecommendationEngine
from utils import get_translation_dict

In [2]:
# Loading necessary csvs into Pandas
data_folder = "/Users/alex/Workspace/Datasets/OlistEcommercePublicDataset"

orders_df = pd.read_csv(os.path.join(data_folder, "olist_orders_dataset.csv"))
reviews_df = pd.read_csv(os.path.join(data_folder, "olist_order_reviews_dataset.csv"))
products_df = pd.read_csv(os.path.join(data_folder, "olist_products_dataset.csv"))
order_items_df = pd.read_csv(os.path.join(data_folder, "olist_order_items_dataset.csv"))
customer_df = pd.read_csv(os.path.join(data_folder, "olist_customers_dataset.csv"))
cat_name_translation = pd.read_csv(
    os.path.join(data_folder, "product_category_name_translation.csv")
)

dfs = [orders_df, reviews_df, products_df, order_items_df, customer_df]


In [3]:
# Converting ID columns from 'object' type to string
for df in dfs:
    for column, ctype in zip(df.columns, df.dtypes):
        if ctype == "object":
            df[column] = df[column].astype("string")

unique_id_df = pd.merge(
    orders_df[["order_id", "customer_id"]],
    customer_df[["customer_id", "customer_unique_id"]],
    on=["customer_id"],
    how="inner",
)

product_and_order_id_df = pd.merge(
    orders_df[["order_id", "customer_id"]],
    order_items_df[["order_id", "product_id"]],
    on=["order_id"],
    how="inner",
)
user_product_order_id_df = pd.merge(
    unique_id_df, product_and_order_id_df, on=["order_id", "customer_id"], how="inner"
)
final_df = pd.merge(
    user_product_order_id_df,
    reviews_df[["order_id", "review_score"]],
    on=["order_id"],
    how="inner",
).drop(["customer_id", "order_id"], axis=1)


In [4]:
### Filtering dataframes so we have a smaller subset
data = final_df.copy()

# get total counts of no. of occurence of product
data["count"] = data.groupby("product_id").transform("count")["customer_unique_id"]

# fetch top 100 movies based on count
product_id = (
    data.drop_duplicates("product_id")
    .sort_values("count", ascending=False)
    .iloc[:500]["product_id"]
)

# filter out data as per the product_id
data = data[data["product_id"].isin(product_id)].reset_index(drop=True)

# get total counts of no. of occurence of customer
data["count"] = data.groupby("customer_unique_id").transform("count")["product_id"]

# fetch top 1000 products based on count
customer_id = (
        data.drop_duplicates("customer_unique_id")
        .sort_values("count", ascending=False)
        .iloc[:1000]["customer_unique_id"]
    )

data = data[data["customer_unique_id"].isin(customer_id)].reset_index(drop=True)

data

Unnamed: 0,customer_unique_id,product_id,review_score,count
0,7973a6ba9c81ecaeb3d628c33c7c7c48,7c1bd920dbdf22470b68bde975dd3ccf,5,3
1,831a032a3327e2b8325faf9d37953870,216bb0e0cd43ffd832e0973d35e0377e,5,6
2,831a032a3327e2b8325faf9d37953870,216bb0e0cd43ffd832e0973d35e0377e,5,6
3,831a032a3327e2b8325faf9d37953870,216bb0e0cd43ffd832e0973d35e0377e,5,6
4,831a032a3327e2b8325faf9d37953870,216bb0e0cd43ffd832e0973d35e0377e,5,6
...,...,...,...,...
3096,5bc738a48bdb9ca1f798d56de2cce84c,53759a2ecddad2bb87a079a1f1519f73,5,5
3097,5bc738a48bdb9ca1f798d56de2cce84c,53759a2ecddad2bb87a079a1f1519f73,5,5
3098,5bc738a48bdb9ca1f798d56de2cce84c,53759a2ecddad2bb87a079a1f1519f73,5,5
3099,5097a5312c8b157bb7be58ae360ef43c,d1c427060a0f73f6b889a5c7c61f2ac4,2,2


In [5]:
# Pivoting DataFrame to obtain a User-Item matrix
# df = data.pivot_table(index="customer_unique_id", columns="product_id", values="review_score")

# Loading translation dictionary [Portugues -> English]
translate_dict = get_translation_dict(cat_name_translation)

# Initializing our custom recommendation engine
recommendationengine = RecommendationEngine(data, products_df, order_items_df, translate_dict)

In [6]:
customer_idx = 0
display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2, cluster=True))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

Unnamed: 0,product_id,product_category_name,score,price
0,349ce46a0e2e20054aa9d80c48af8816,construction_tools_lights,5.0,150.470238
1,6a2909ac21d16b721e4795e7e8ff3e68,sports_leisure,4.998252,24.203103


Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9


Clustering using custommer: 0064a1b5f5cddd047c987b988a90c8c1


Unnamed: 0,product_id,product_category_name,score,price
0,349ce46a0e2e20054aa9d80c48af8816,construction_tools_lights,5.0,150.470238
1,389d119b48cf3043d311335e499d9c6b,garden_tools,4.97459,54.695383


Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9


In [7]:
customer_idx = 200
display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2, cluster=True))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

Unnamed: 0,product_id,product_category_name,score,price
0,349ce46a0e2e20054aa9d80c48af8816,construction_tools_lights,4.99943,150.470238
1,b38b25d838ae0b8385e8cc68b9017644,health_beauty,4.998256,160.28125


Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9
1,389d119b48cf3043d311335e499d9c6b,5.0,garden_tools,49.9


Clustering using custommer: 36cfec707344b75d20e6c7ef583c3b8c


Unnamed: 0,product_id,product_category_name,score,price
0,349ce46a0e2e20054aa9d80c48af8816,construction_tools_lights,4.986168,150.470238
1,a19b6951c75da43aad691622dd2f6abe,furniture_decor,4.956619,35.448


Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9
1,389d119b48cf3043d311335e499d9c6b,5.0,garden_tools,49.9


In [8]:
customer_idx = 300
display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2, cluster=True))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

Unnamed: 0,product_id,product_category_name,score,price
0,3cb39171fd36c50097f2dedbbe0dfe6e,auto,4.997568,29.624884
1,89b121bee266dcd25688a1ba72eefb61,computers_accessories,4.997568,79.9


Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,1.0,garden_tools,49.9


Clustering using custommer: 4ea07e24e59ceae33aeb18024919b4d8


Unnamed: 0,product_id,product_category_name,score,price
1,389d119b48cf3043d311335e499d9c6b,garden_tools,4.669016,54.695383
0,349ce46a0e2e20054aa9d80c48af8816,construction_tools_lights,4.665743,150.470238


Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,1.0,garden_tools,49.9


In [13]:
customer_idx = 570
display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2, cluster=True))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

Unnamed: 0,product_id,product_category_name,score,price
0,67bd616e1ba0d3d3e8545f3113b0140d,health_beauty,4.993852,15.030541
1,fe6a9515d655fa7936b8a7c841039f34,cool_stuff,4.993852,250.4


Unnamed: 0,product_id,rating,product_category_name,price
0,e53e557d5a159f5aa2c5e995dfdf244b,2.0,computers_accessories,77.9
1,36f60d45225e60c7da4558b070ce4b60,1.5,computers_accessories,88.0


Clustering using custommer: 94aea9aae532453c72b6afbfe0eaec10


Unnamed: 0,product_id,product_category_name,score,price
1,ee57070aa3b24a06fdd0e02efd2d757d,computers_accessories,4.562635,73.5796
0,3f14d740544f37ece8a9e7bc8349797e,computers_accessories,3.418762,84.956374


Unnamed: 0,product_id,rating,product_category_name,price
0,e53e557d5a159f5aa2c5e995dfdf244b,2.0,computers_accessories,77.9
1,36f60d45225e60c7da4558b070ce4b60,1.5,computers_accessories,88.0
