# Home & Living Growth Strategy Report

**Olist E-commerce Data Analysis**

This report contains data analysis results for establishing growth strategies for Olist's Home & Living category.

## 0. Environment Setup

In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
from pathlib import Path
warnings.filterwarnings('ignore')

print("✅ Libraries Loaded")

✅ Libraries Loaded


## 1. Load Data

In [2]:
PROJECT_ROOT = Path.cwd().parents[0]   # notebooks/ 기준
df = pd.read_csv(PROJECT_ROOT / "data" / "preprocessed" / "preprocess_all_merged.csv")
df.head()

Unnamed: 0,order_id,order_item_id,product_id,seller_id,shipping_limit_date,price,freight_value,customer_id,order_status,order_purchase_timestamp,...,customer_state,geolocation_zip_code_prefix,geolocation_lat,geolocation_lng,geolocation_city,geolocation_state,count,seller_zip_code_prefix,seller_city,seller_state
0,00010242fe8c5a6d1ba2dd792cb16214,1,4244733e06e7ecb4970a6e2683c13e61,48436dade18ac8b2bce089ec2a041202,2017-09-19 09:45:35,58.9,13.29,3ce436f183e68e07877b285a838db11a,delivered,2017-09-13 08:59:02,...,RJ,28013.0,-21.762775,-41.309633,campos dos goytacazes,RJ,149.0,27277,volta redonda,SP
1,00018f77f2f0320c557190d7a144bdd3,1,e5f2d52b802189ee658865ca93d83a8f,dd7ddc04e1b6c2c614352b383efe2d36,2017-05-03 11:05:13,239.9,19.93,f6dd3ec061db4e3987629fe6b26e5cce,delivered,2017-04-26 10:53:06,...,SP,15775.0,-20.220527,-50.903424,santa fe do sul,SP,367.0,3471,sao paulo,SP
2,000229ec398224ef6ca0657da4fc703e,1,c777355d18b72b67abbeef9df44fd0fd,5b51032eddd242adc84c38acab88f23d,2018-01-18 14:48:30,199.0,17.87,6489ae5e4333f3693df5ad4372dab6d3,delivered,2018-01-14 14:33:31,...,MG,35661.0,-19.870305,-44.593326,para de minas,MG,224.0,37564,borda da mata,MG
3,00024acbcdf0a6daa1e931b038114c75,1,7634da152a4610f1595efa32f14722fc,9d7a1d34a5052409006425275ba1c2b4,2018-08-15 10:10:18,12.99,12.79,d4eb9395c8c0431ee92fce09860c5a06,delivered,2018-08-08 10:00:35,...,SP,12952.0,-23.089925,-46.611654,atibaia,SP,27.0,14403,franca,SP
4,00042b26cf59d7ce69dfabb4e55b4fd9,1,ac6c3623068f30de03045865e4e10089,df560393f3a51e74553ab94004ba5c87,2017-02-13 13:57:51,199.9,18.14,58dbd0b2d70206bf40e62cd34e84d795,delivered,2017-02-04 13:57:51,...,SP,13226.0,-23.243402,-46.827614,varzea paulista,SP,4.0,87900,loanda,PR


## 2. Data Preprocessing

In [3]:
# Industry Mapping + Base Metrics
category_col = 'product_category_name_english' if 'product_category_name_english' in df.columns else 'product_category_name'

industry_map = {
    # English
    'bed_bath_table': 'Home & Living',
    'furniture_decor': 'Home & Living',
    'office_furniture': 'Home & Living',
    'furniture_living_room': 'Home & Living',
    'housewares': 'Home & Living',
    'home_construction': 'Home & Living',
    'garden_tools': 'Home & Living',
    'computers_accessories': 'Tech & Electronics',
    'pcs': 'Tech & Electronics',
    'telephony': 'Tech & Electronics',
    'home_appliances': 'Tech & Electronics',
    'small_appliances': 'Tech & Electronics',
    'air_conditioning': 'Tech & Electronics',
    'electronics': 'Tech & Electronics',
    'consoles_games': 'Tech & Electronics',
    'perfumery': 'Health & Beauty',
    'health_beauty': 'Health & Beauty',
    'sports_leisure': 'Sports & Leisure',
    'musical_instruments': 'Sports & Leisure',
    'art': 'Sports & Leisure',
    'watches_gifts': 'Fashion & Accessories',
    'luggage_accessories': 'Fashion & Accessories',
    'fashion_bags_accessories': 'Fashion & Accessories',
    'fashion_shoes': 'Fashion & Accessories',
    'baby': 'Kids & Toys',
    'toys': 'Kids & Toys',
    'auto': 'Automotive',
    'stationery': 'Life Goods',
    'books_general_interest': 'Life Goods',
    'cool_stuff': 'Life Goods',
    'pet_shop': 'Pet & Agro',
    'agro_industry_and_commerce': 'Pet & Agro',
    'construction_tools_safety': 'Construction & Safety',
    'signaling_and_security': 'Construction & Safety',

    # Portuguese (Olist original)
    'cama_mesa_banho': 'Home & Living',
    'moveis_decoracao': 'Home & Living',
    'moveis_escritorio': 'Home & Living',
    'moveis_sala': 'Home & Living',
    'moveis_quarto': 'Home & Living',
    'moveis_colchao_e_estofado': 'Home & Living',
    'moveis_cozinha_area_de_servico_jantar_e_jardim': 'Home & Living',
    'utilidades_domesticas': 'Home & Living',
    'casa_construcao': 'Home & Living',
    'ferramentas_jardim': 'Home & Living',
    'informatica_acessorios': 'Tech & Electronics',
    'pcs': 'Tech & Electronics',
    'telefonia': 'Tech & Electronics',
    'eletrodomesticos': 'Tech & Electronics',
    'eletroportateis': 'Tech & Electronics',
    'ar_condicionado': 'Tech & Electronics',
    'eletronicos': 'Tech & Electronics',
    'consoles_games': 'Tech & Electronics',
    'perfumaria': 'Health & Beauty',
    'beleza_saude': 'Health & Beauty',
    'esporte_lazer': 'Sports & Leisure',
    'instrumentos_musicais': 'Sports & Leisure',
    'arte': 'Sports & Leisure',
    'relogios_presentes': 'Fashion & Accessories',
    'malas_acessorios': 'Fashion & Accessories',
    'fashion_bolsas_e_acessorios': 'Fashion & Accessories',
    'fashion_calcados': 'Fashion & Accessories',
    'bebes': 'Kids & Toys',
    'brinquedos': 'Kids & Toys',
    'automotivo': 'Automotive',
    'papelaria': 'Life Goods',
    'livros_tecnicos': 'Life Goods',
    'pet_shop': 'Pet & Agro',
    'agro_industria_e_comercio': 'Pet & Agro',
    'construcao_ferramentas_seguranca': 'Construction & Safety',
    'sinalizacao_e_seguranca': 'Construction & Safety',
}

category_label_map = {
    'cama_mesa_banho': 'bed_bath_table',
    'moveis_decoracao': 'furniture_decor',
    'moveis_escritorio': 'office_furniture',
    'moveis_sala': 'furniture_living_room',
    'utilidades_domesticas': 'housewares',
    'casa_construcao': 'home_construction',
    'ferramentas_jardim': 'garden_tools',
    'moveis_quarto': 'furniture_bedroom',
    'moveis_colchao_e_estofado': 'mattress_upholstery',
    'moveis_cozinha_area_de_servico_jantar_e_jardim': 'kitchen_dining_garden',
}

df['industry'] = df[category_col].map(industry_map).fillna('Other')
df['gmv_item'] = df['price'] + df['freight_value']
df['category_display'] = df[category_col].map(category_label_map).fillna(df[category_col])

MAIN_COLOR = '#3e84df'
ACCENT_COLOR = '#ffa743'
FIG_WIDTH = 1200
FIG_HEIGHT = 675

print('Industry mapping and GMV calculation complete')



Industry mapping and GMV calculation complete


## 3. Home & Living Visualizations


In [4]:
# Filter Home & Living
df_home = df[df['industry'] == 'Home & Living'].copy()
seller_gmv = df_home.groupby('seller_id')['gmv_item'].sum().sort_values(ascending=False).reset_index()
seller_gmv['rank'] = range(1, len(seller_gmv) + 1)
seller_gmv['cumsum_share'] = seller_gmv['gmv_item'].cumsum() / seller_gmv['gmv_item'].sum()

half_idx = seller_gmv[seller_gmv['cumsum_share'] >= 0.5].index[0]
half_rank = int(seller_gmv.loc[half_idx, 'rank'])
half_share = float(seller_gmv.loc[half_idx, 'cumsum_share'])

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=seller_gmv['rank'],
    y=seller_gmv['cumsum_share'],
    mode='lines',
    line=dict(color=MAIN_COLOR, width=3),
    name='Cumulative Revenue'
))
fig.add_hline(y=0.5, line_dash='dash', line_color=ACCENT_COLOR)
fig.add_vline(x=half_rank, line_dash='dash', line_color=ACCENT_COLOR)
fig.add_trace(go.Scatter(
    x=[half_rank],
    y=[half_share],
    mode='markers',
    marker=dict(color=ACCENT_COLOR, size=10),
    name='50% Revenue'
))
fig.update_layout(
    title='Home & Living Seller Revenue Concentration (Pareto)',
    xaxis_title='Seller Rank',
    yaxis_title='Cumulative Revenue Share',
    yaxis_tickformat='.0%',
    width=FIG_WIDTH,
    height=FIG_HEIGHT,
    showlegend=True
)
fig.show()
print(f'50% revenue is reached by top {half_rank:,} sellers.')


50% revenue is reached by top 38 sellers.


In [5]:
# Top 30 sellers vs others: SKU comparison
top30_ids = seller_gmv.head(30)['seller_id']
seller_skus = df_home.groupby('seller_id')['product_id'].nunique()

top30_skus = seller_skus.loc[top30_ids]
other_skus = seller_skus.drop(top30_ids)

sku_stats = pd.DataFrame({
    'Metric': ['Avg SKU', 'Median SKU'],
    'Top 30 Sellers': [top30_skus.mean(), top30_skus.median()],
    'Other Sellers': [other_skus.mean(), other_skus.median()],
})

fig = go.Figure()
fig.add_trace(go.Bar(
    x=sku_stats['Metric'],
    y=sku_stats['Other Sellers'],
    name='Other Sellers',
    marker_color=MAIN_COLOR
))
fig.add_trace(go.Bar(
    x=sku_stats['Metric'],
    y=sku_stats['Top 30 Sellers'],
    name='Top 30 Sellers',
    marker_color=ACCENT_COLOR
))
fig.update_layout(
    title='SKU Comparison (Top 30 vs Other Sellers)',
    yaxis_title='Number of SKUs',
    barmode='group',
    width=FIG_WIDTH,
    height=FIG_HEIGHT
)
fig.show()


In [6]:
# SKU range vs median revenue (Home & Living)
seller_stats = df_home.groupby('seller_id').agg(
    sku_count=('product_id', 'nunique'),
    revenue=('gmv_item', 'sum')
).reset_index()

bins = [0, 2, 4, 6, 9, 16, 10**9]
labels = ['1-2', '3-4', '5-6', '7-9', '10-16', '17+']
seller_stats['sku_bin'] = pd.cut(seller_stats['sku_count'], bins=bins, labels=labels)

bin_summary = seller_stats.groupby('sku_bin')['revenue'].median().reindex(labels).reset_index()

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=bin_summary['sku_bin'],
    y=bin_summary['revenue'],
    mode='lines+markers',
    line=dict(color=MAIN_COLOR, width=3),
    marker=dict(size=8)
))
fig.add_trace(go.Scatter(
    x=['17+'],
    y=[bin_summary.loc[bin_summary['sku_bin'] == '17+', 'revenue'].values[0]],
    mode='markers',
    marker=dict(color=ACCENT_COLOR, size=12),
    name='17+ SKU'
))
fig.update_layout(
    title='Median Revenue by SKU Range (Home & Living)',
    xaxis_title='SKU Range',
    yaxis_title='Median Revenue',
    width=FIG_WIDTH,
    height=FIG_HEIGHT,
    showlegend=False
)
fig.show()


In [7]:
# Top 3 categories drive ~80% of revenue (Home & Living)
cat_rev = df_home.groupby('category_display')['gmv_item'].sum().reset_index()
cat_orders = df_home.groupby('category_display')['order_id'].nunique().reset_index(name='order_count')
cat_summary = cat_rev.merge(cat_orders, on='category_display')
cat_summary = cat_summary.sort_values('gmv_item', ascending=False)

top3 = cat_summary.head(3)
top3_share = top3['gmv_item'].sum() / cat_summary['gmv_item'].sum()

plot_data = cat_summary.head(10).sort_values('order_count', ascending=True)
colors = [ACCENT_COLOR if c in top3['category_display'].values else MAIN_COLOR for c in plot_data['category_display']]

fig = go.Figure(go.Bar(
    y=plot_data['category_display'],
    x=plot_data['order_count'],
    orientation='h',
    marker=dict(color=colors)
))
fig.update_layout(
    title=f'Home & Living: Top 3 Categories = {top3_share:.0%} of Revenue',
    xaxis_title='Number of Orders',
    yaxis_title='Category',
    width=FIG_WIDTH,
    height=FIG_HEIGHT
)
fig.show()


In [8]:
# Overall retention (Home & Living)
cust_orders = df_home.groupby('customer_unique_id')['order_id'].nunique()
retention_rate = (cust_orders >= 2).mean()

fig = go.Figure(data=[go.Pie(
    labels=['First-time', 'Returning'],
    values=[1 - retention_rate, retention_rate],
    hole=0.6,
    marker=dict(colors=[MAIN_COLOR, ACCENT_COLOR])
)])
fig.update_layout(
    title='Home & Living Retention (Overall)',
    width=FIG_WIDTH,
    height=FIG_HEIGHT
)
fig.add_annotation(
    text=f"{retention_rate:.2%}",
    x=0.5,
    y=0.5,
    font=dict(size=32, color=ACCENT_COLOR),
    showarrow=False
)
fig.show()


In [9]:
import pandas as pd
import plotly.graph_objects as go

# Home & Living ??? ?? (??? ???)
category_col = 'product_category_name_english' if 'product_category_name_english' in df.columns else 'product_category_name'
filter_col = 'category_display' if category_col == 'product_category_name' and 'category_display' in df.columns else category_col

# ???? ??
target_categories = [
    'bed_bath_table',
    'furniture_decor',
    'home_utilities',
    'home_comfort'
]

df_home = df[df[filter_col].isin(target_categories)].copy()

# ?? ?? ??
cust_orders = (
    df_home
    .groupby('customer_unique_id')['order_id']
    .nunique()
)

# ??? ??? ??
n1 = (cust_orders >= 1).sum()
n2 = (cust_orders >= 2).sum()
n3 = (cust_orders >= 3).sum()
n4 = (cust_orders >= 4).sum()

def safe_div(num, den):
    return num / den if den else 0

ret_1_2 = safe_div(n2, n1)
ret_2_3 = safe_div(n3, n2)
ret_3_4 = safe_div(n4, n3)

# ??
MAIN_BLUE = "#3E84DF"

# Bar chart
fig = go.Figure(data=[
    go.Bar(
        x=['1 ? 2 ??', '2 ? 3 ??', '3 ? 4 ??'],
        y=[ret_1_2, ret_2_3, ret_3_4],
        text=[f'{ret_1_2:.1%}', f'{ret_2_3:.1%}', f'{ret_3_4:.1%}'],
        textposition='outside',
        marker=dict(color=MAIN_BLUE),
        textfont=dict(color=MAIN_BLUE)
    )
])

fig.update_layout(
    title='Home & Living Purchase-Step Retention',
    yaxis=dict(tickformat='.0%'),
    width=FIG_WIDTH,
    height=FIG_HEIGHT,
    showlegend=False
)

fig.show()

