In [20]:
import os

import pandas as pd

from IPython.display import display, Markdown
from recommender import RecommendationEngine
from utils import *

In [21]:
# Loading necessary csvs into Pandas
data_folder = "/Users/alex/Workspace/Datasets/OlistEcommercePublicDataset"

df_names = [
    "orders_df",
    "reviews_df",
    "products_df",
    "order_items_df",
    "customer_df",
    "cat_name_translation",
]
df_files = [
    "olist_orders_dataset.csv",
    "olist_order_reviews_dataset.csv",
    "olist_products_dataset.csv",
    "olist_order_items_dataset.csv",
    "olist_customers_dataset.csv",
    "product_category_name_translation.csv",
]

# Loading dataframes
dataframes = preprocess_dataframes(read_dataframes(df_names, df_files, data_folder))

# Filtering final dataframe by most active users and bought items
final_df = filter_dataframe(
    join_dataframes(dataframes), item_number=500, user_number=1000
)

# Loading translation dictionary [Portugues -> English]
translate_dict = get_translation_dict(dataframes["cat_name_translation"])

# Initializing our custom recommendation engine
recommendationengine = RecommendationEngine(
    final_df, dataframes["products_df"], dataframes["order_items_df"], translate_dict
)


In [24]:
customer_idx = 0

display(Markdown("### Bought Items"))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

display(Markdown("### Recommendation - Without clustering"))
display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2))

display(Markdown("### Recommendation - With clustering"))
display(recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2, cluster=True))

### Bought Items

Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9


### Recommendation - Without clustering

(                         product_id      product_category_name     score  \
 0  349ce46a0e2e20054aa9d80c48af8816  construction_tools_lights  5.000000   
 1  6a2909ac21d16b721e4795e7e8ff3e68             sports_leisure  4.998252   
 
         price  
 0  150.470238  
 1   24.203103  ,
 '0064a1b5f5cddd047c987b988a90c8c1')

### Recommendation - With clustering

(                         product_id      product_category_name    score  \
 0  349ce46a0e2e20054aa9d80c48af8816  construction_tools_lights  5.00000   
 1  389d119b48cf3043d311335e499d9c6b               garden_tools  4.97459   
 
         price  
 0  150.470238  
 1   54.695383  ,
 '0064a1b5f5cddd047c987b988a90c8c1')

In [18]:
customer_idx = 200
display(Markdown("### Bought Items"))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

display(Markdown("### Recommendation - Without clustering"))
display(
    recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2)
)

display(Markdown("### Recommendation - With clustering"))
display(
    recommendationengine.get_recommendation(
        customer_idx=customer_idx, nr_of_items=2, cluster=True
    )
)

### Bought Items

Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9
1,389d119b48cf3043d311335e499d9c6b,5.0,garden_tools,49.9


### Recommendation - Without clustering

(                         product_id      product_category_name     score  \
 0  349ce46a0e2e20054aa9d80c48af8816  construction_tools_lights  4.999430   
 1  b38b25d838ae0b8385e8cc68b9017644              health_beauty  4.998256   
 
         price  
 0  150.470238  
 1  160.281250  ,
 '36cfec707344b75d20e6c7ef583c3b8c')

### Recommendation - With clustering

(                         product_id      product_category_name     score  \
 0  349ce46a0e2e20054aa9d80c48af8816  construction_tools_lights  4.986168   
 1  a19b6951c75da43aad691622dd2f6abe            furniture_decor  4.956619   
 
         price  
 0  150.470238  
 1   35.448000  ,
 '36cfec707344b75d20e6c7ef583c3b8c')

In [19]:
customer_idx = 630
display(Markdown("### Bought Items"))
display(recommendationengine.get_bought_items(customer_idx=customer_idx, nr_of_items=2))

display(Markdown("### Recommendation - Without clustering"))
display(
    recommendationengine.get_recommendation(customer_idx=customer_idx, nr_of_items=2)
)

display(Markdown("### Recommendation - With clustering"))
display(
    recommendationengine.get_recommendation(
        customer_idx=customer_idx, nr_of_items=2, cluster=True
    )
)

### Bought Items

Unnamed: 0,product_id,rating,product_category_name,price
0,36f60d45225e60c7da4558b070ce4b60,1.0,computers_accessories,88.0
1,e53e557d5a159f5aa2c5e995dfdf244b,1.0,computers_accessories,77.9


### Recommendation - Without clustering

(                         product_id product_category_name     score  \
 0  67bd616e1ba0d3d3e8545f3113b0140d         health_beauty  4.994034   
 1  fe6a9515d655fa7936b8a7c841039f34            cool_stuff  4.994034   
 
         price  
 0   15.030541  
 1  250.400000  ,
 'a4669a28dd126c93ca64ddf49a15e496')

### Recommendation - With clustering

(                         product_id  product_category_name     score  \
 1  ee57070aa3b24a06fdd0e02efd2d757d  computers_accessories  4.297065   
 0  3f14d740544f37ece8a9e7bc8349797e  computers_accessories  3.305885   
 
        price  
 1  73.579600  
 0  84.956374  ,
 'a4669a28dd126c93ca64ddf49a15e496')