In [1]:
import pandas as pd

# Carregar o dataset
df = pd.read_csv('../data/commerce_dataset_clean.csv', sep=';')

# Vamos verificar as primeiras linhas do dataset para entender sua estrutura
df.head()


Unnamed: 0,invoice_id,branch,city,customer_type,gender,product_line,unit_price,quantity,vat,total,...,payment_method,cogs,gross_margin_pct,gross_income,rating,time_of_day,day_name,month_name,month,quarter
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,...,Ewallet,522.83,4.7619,26.1415,9.1,afternoon,Saturday,January,1,1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,...,Cash,76.4,4.7619,3.82,9.6,morning,Friday,March,3,1
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,...,Credit card,324.31,4.7619,16.2155,7.4,afternoon,Sunday,March,3,1
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,...,Ewallet,465.76,4.7619,23.288,8.4,evening,Sunday,January,1,1
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,...,Ewallet,604.17,4.7619,30.2085,5.3,morning,Friday,February,2,1


In [2]:
# Cálculos das métricas de desempenho de vendas
metrics = {}

# Total de vendas no período
metrics['total_sales'] = df['total'].sum()

# Número total de produtos vendidos
metrics['total_products_sold'] = df['quantity'].sum()

# Média de preço unitário de linha de produtos
average_unit_price_per_product_line = df.groupby('product_line')['unit_price'].mean()

# Linha de produto mais vendido (em termos de quantidade)
most_sold_product_line = df.groupby('product_line')['quantity'].sum().idxmax()

# As 5 linhas de produtos mais bem avaliados (média de rating mais alta)
top_rated_product_lines = df.groupby('product_line')['rating'].mean().sort_values(ascending=False).head(5)

# Loja com o maior volume de vendas
top_sales_branch = df.groupby('branch')['total'].sum().idxmax()

# Método de pagamento mais popular por loja e mês (considerando a frequência de uso)
popular_payment_method = df.groupby(['branch', 'month_name'])['payment_method'].agg(lambda x:x.value_counts().idxmax())

# As 3 linhas de produtos com mais quantidades vendidas por gênero do cliente
top_products_by_gender = df.groupby(['gender', 'product_line'])['quantity'].sum().groupby(level=0, group_keys=False).nlargest(3)

# Produto mais lucrativo (maior receita gross_income) por filial (branch)
most_profitable_product_by_branch = df.groupby(['branch', 'product_line'])['gross_income'].sum().groupby(level=0, group_keys=False).idxmax()

# Produto mais lucrativo (maior receita gross_income) por quarter
most_profitable_product_by_quarter = df.groupby(['quarter', 'product_line'])['gross_income'].sum().groupby(level=0, group_keys=False).idxmax()

# Período do dia em que ocorre o maior número de vendas
most_sales_time_of_day = df['time_of_day'].value_counts().idxmax()

# Preparar a análise detalhada por quarter, região e categoria de produto para visualização posterior

# Armazenar resultados para visualização
metrics_results = {
    "average_unit_price_per_product_line": average_unit_price_per_product_line,
    "most_sold_product_line": most_sold_product_line,
    "top_rated_product_lines": top_rated_product_lines,
    "top_sales_branch": top_sales_branch,
    "popular_payment_method": popular_payment_method,
    "top_products_by_gender": top_products_by_gender,
    "most_profitable_product_by_branch": most_profitable_product_by_branch,
    "most_profitable_product_by_quarter": most_profitable_product_by_quarter,
    "most_sales_time_of_day": most_sales_time_of_day,
}

metrics_results


{'average_unit_price_per_product_line': product_line
 Electronic accessories    53.551588
 Fashion accessories       57.153652
 Food and beverages        56.008851
 Health and beauty         54.854474
 Home and lifestyle        55.316937
 Sports and travel         56.993253
 Name: unit_price, dtype: float64,
 'most_sold_product_line': 'Electronic accessories',
 'top_rated_product_lines': product_line
 Food and beverages        7.113218
 Fashion accessories       7.029213
 Health and beauty         7.003289
 Electronic accessories    6.924706
 Sports and travel         6.916265
 Name: rating, dtype: float64,
 'top_sales_branch': 'C',
 'popular_payment_method': branch  month_name
 A       February          Ewallet
         January           Ewallet
         March             Ewallet
 B       February             Cash
         January       Credit card
         March             Ewallet
 C       February             Cash
         January              Cash
         March                Cas