In [1]:
import os

import pandas as pd

from IPython.display import display
from recommender import RecommendationEngine
from utils import *

In [2]:
# Loading necessary csvs into Pandas
data_folder = "/Users/alex/Workspace/Datasets/OlistEcommercePublicDataset"

df_names = [
    "orders_df",
    "reviews_df",
    "products_df",
    "order_items_df",
    "customer_df",
    "cat_name_translation",
]
df_files = [
    "olist_orders_dataset.csv",
    "olist_order_reviews_dataset.csv",
    "olist_products_dataset.csv",
    "olist_order_items_dataset.csv",
    "olist_customers_dataset.csv",
    "product_category_name_translation.csv",
]

# Loading dataframes
dataframes = preprocess_dataframes(read_dataframes(df_names, df_files, data_folder))

# Filtering final dataframe by most active users and bought items
final_df = filter_dataframe(
    join_dataframes(dataframes), item_number=500, user_number=1000
)

# Loading translation dictionary [Portugues -> English]
translate_dict = get_translation_dict(dataframes["cat_name_translation"])

# Initializing our custom recommendation engine
recommendationengine = RecommendationEngine(
    final_df, dataframes["products_df"], dataframes["order_items_df"], translate_dict
)

In [6]:
recommendationengine.pivot_df.head()

product_id,0152f69b6cf919bcdaf117aa8c43e5a2,017692475c1c954ff597feda05131d73,0502d1a36be75bd36b452f31c6ed264a,054515fd15bc1a2029f10de97ffa9120,060c17562f97e5bb60bc0dfa4dd5b3f2,060cb19345d90064d1015407193c233d,06edb72f1e0c64b14c5b79353f7abea3,08574b074924071f4e201e151b152b4e,086351823300e0339f6955b27998c186,0a4093a4af429dc0a9334300e5c13ae5,...,f646af315b0f3a597f69213537ca2199,f71973c922ccaab05514a36a8bc741b8,f7a17d2c51d9df89a4f1711c4ac17f33,f7f59e6186e10983a061ac7bdb3494d6,f908d3bf313a1308bfb2a46ea2685347,fb55982be901439613a95940feefd9ee,fb7a100ec8c7b34f60cec22b1a9a10e0,fbc1488c1a1e72ba175f53ab29a248e8,fe077ec80df6b4ee60bb4498d5ab1962,fe6a9515d655fa7936b8a7c841039f34
customer_unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0064a1b5f5cddd047c987b988a90c8c1,,,,,,,,,,,...,,,,,,,,,,
0096c18412349537abc45e350581cbbc,,,,,,,,,,,...,,,,,,,,,,
00da34ebf22b87f6c2f53dc069f237df,,,,,,,,,,,...,,,,,,,,,,
015b72d1a4ec2bfcf75518ada117c62b,,,,,,,,,,,...,,,,,,,,,,
01626676a41ff29901f7b42bc8e09dd8,,,,,,,,,,,...,,,,,,,,,,


In [5]:
dataframes['products_df'].head()

Unnamed: 0,product_id,product_category_name,product_name_lenght,product_description_lenght,product_photos_qty,product_weight_g,product_length_cm,product_height_cm,product_width_cm
0,1e9e8ef04dbcff4541ed26657ea517e5,perfumaria,40.0,287.0,1.0,225.0,16.0,10.0,14.0
1,3aa071139cb16b67ca9e5dea641aaa2f,artes,44.0,276.0,1.0,1000.0,30.0,18.0,20.0
2,96bd76ec8810374ed1b65e291975717f,esporte_lazer,46.0,250.0,1.0,154.0,18.0,9.0,15.0
3,cef67bcfe19066a932b7673e239eb23d,bebes,27.0,261.0,1.0,371.0,26.0,4.0,26.0
4,9dc1a7de274444849c219cff195d0b71,utilidades_domesticas,37.0,402.0,4.0,625.0,20.0,17.0,13.0


In [9]:
dataframes['customer_df'].head()

Unnamed: 0,customer_id,customer_unique_id,customer_zip_code_prefix,customer_city,customer_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,14409,franca,SP
1,18955e83d337fd6b2def6b18a428ac77,290c77bc529b7ac935b93aa66c333dc3,9790,sao bernardo do campo,SP
2,4e7b3e00288586ebd08712fdd0374a03,060e732b5b29e8181a18229c7b0b2b5e,1151,sao paulo,SP
3,b2b6027bc5c5109e529d4dc6358b12c3,259dac757896d24d7702b9acbbff3f3c,8775,mogi das cruzes,SP
4,4f2d8ab171c80ec8364f7c12e35b23ad,345ecd01c38d18a9036ed96c73b8d066,13056,campinas,SP
