In [1]:
import os

import pandas as pd

from IPython.display import display, Markdown
from recommender import RecommendationEngine
from utils import *


In [2]:
# Loading necessary csvs into Pandas
data_folder = "/Users/alex/Workspace/Datasets/OlistEcommercePublicDataset"

df_names = [
    "orders_df",
    "reviews_df",
    "products_df",
    "order_items_df",
    "customer_df",
    "cat_name_translation",
]
df_files = [
    "olist_orders_dataset.csv",
    "olist_order_reviews_dataset.csv",
    "olist_products_dataset.csv",
    "olist_order_items_dataset.csv",
    "olist_customers_dataset.csv",
    "product_category_name_translation.csv",
]

# Loading dataframes
dataframes = preprocess_dataframes(read_dataframes(df_names, df_files, data_folder))

# Filtering final dataframe by most active users and bought items
final_df = filter_dataframe(
    join_dataframes(dataframes), item_number=500, user_number=1000
)

# Loading translation dictionary [Portugues -> English]
translate_dict = get_translation_dict(dataframes["cat_name_translation"])

# Initializing our custom recommendation engine
recommendationengine = RecommendationEngine(
    final_df, dataframes["products_df"], dataframes["order_items_df"], translate_dict, sim_method='itr'
)


In [3]:
customer_idx = 0

display(Markdown("### Bought Items"))
bought_items = recommendationengine.get_bought_items(
    customer_idx=customer_idx, nr_of_items=2
)
display(bought_items)

recco_1, user_id = recommendationengine.get_recommendation(
    customer_idx=customer_idx, nr_of_items=2
)
display(Markdown("### Recommendation - Without clustering"))
display(recco_1)

recco_2, user_id = recommendationengine.get_recommendation(
    customer_idx=customer_idx, nr_of_items=2, cluster=True
)
display(Markdown("### Recommendation - With clustering"))
display(recco_2)


### Bought Items

Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9


### Recommendation - Without clustering

Unnamed: 0,product_id,product_category_name,score,price
0,349ce46a0e2e20054aa9d80c48af8816,construction_tools_lights,5.0,150.470238
1,6a2909ac21d16b721e4795e7e8ff3e68,sports_leisure,4.933756,24.203103


### Recommendation - With clustering

Unnamed: 0,product_id,product_category_name,score,price
0,349ce46a0e2e20054aa9d80c48af8816,construction_tools_lights,5.0,150.470238
1,389d119b48cf3043d311335e499d9c6b,garden_tools,4.765495,54.695383


In [4]:
customer_idx = 200

display(Markdown("### Bought Items"))
bought_items = recommendationengine.get_bought_items(
    customer_idx=customer_idx, nr_of_items=2
)
display(bought_items)

recco_1, user_id = recommendationengine.get_recommendation(
    customer_idx=customer_idx, nr_of_items=2
)
display(Markdown("### Recommendation - Without clustering"))
display(recco_1)

recco_2, user_id = recommendationengine.get_recommendation(
    customer_idx=customer_idx, nr_of_items=2, cluster=True
)
display(Markdown("### Recommendation - With clustering"))
display(recco_2)


### Bought Items

Unnamed: 0,product_id,rating,product_category_name,price
0,368c6c730842d78016ad823897a372db,5.0,garden_tools,49.9
1,389d119b48cf3043d311335e499d9c6b,5.0,garden_tools,49.9


### Recommendation - Without clustering

Unnamed: 0,product_id,product_category_name,score,price
0,b38b25d838ae0b8385e8cc68b9017644,health_beauty,4.933821,160.28125
1,fe6a9515d655fa7936b8a7c841039f34,cool_stuff,4.92366,250.4


### Recommendation - With clustering

Unnamed: 0,product_id,product_category_name,score,price
0,a19b6951c75da43aad691622dd2f6abe,furniture_decor,4.630976,35.448
1,b38b25d838ae0b8385e8cc68b9017644,health_beauty,4.630976,160.28125


In [5]:
customer_idx = 630

display(Markdown("### Bought Items"))
bought_items = recommendationengine.get_bought_items(
    customer_idx=customer_idx, nr_of_items=2
)
display(bought_items)

recco_1, user_id = recommendationengine.get_recommendation(
    customer_idx=customer_idx, nr_of_items=2
)
display(Markdown("### Recommendation - Without clustering"))
display(recco_1)

recco_2, user_id = recommendationengine.get_recommendation(
    customer_idx=customer_idx, nr_of_items=2, cluster=True
)
display(Markdown("### Recommendation - With clustering"))
display(recco_2)


### Bought Items

Unnamed: 0,product_id,rating,product_category_name,price
0,36f60d45225e60c7da4558b070ce4b60,1.0,computers_accessories,88.0
1,e53e557d5a159f5aa2c5e995dfdf244b,1.0,computers_accessories,77.9


### Recommendation - Without clustering

Unnamed: 0,product_id,product_category_name,score,price
0,44fded21627553d1886d459384bbce06,electronics,4.877065,15.0
1,7d854ab97c64ef9df2bbfaf332765786,cool_stuff,4.877065,61.566667


### Recommendation - With clustering

Unnamed: 0,product_id,product_category_name,score,price
1,ee57070aa3b24a06fdd0e02efd2d757d,computers_accessories,2.809408,73.5796
0,3f14d740544f37ece8a9e7bc8349797e,computers_accessories,0.345044,84.956374


In [6]:
recco_2['score'] = recco_2['score'] * 2
recco_1['score'] = recco_1['score'] * 2

display(recco_1)
display(recco_2)

Unnamed: 0,product_id,product_category_name,score,price
0,44fded21627553d1886d459384bbce06,electronics,9.754129,15.0
1,7d854ab97c64ef9df2bbfaf332765786,cool_stuff,9.754129,61.566667


Unnamed: 0,product_id,product_category_name,score,price
1,ee57070aa3b24a06fdd0e02efd2d757d,computers_accessories,5.618816,73.5796
0,3f14d740544f37ece8a9e7bc8349797e,computers_accessories,0.690087,84.956374
