In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
df_raw = pd.read_csv('./data/customer_purchases.csv')

In [14]:
l, c = df_raw.shape
print(f'{l} lihas e {c} colunas' )

1000 lihas e 5 colunas


In [5]:
df_raw['purchase_date'] = pd.to_datetime(df_raw['purchase_date'])

In [6]:
df1 = df_raw.copy()

In [7]:
# 1. Calcular métricas por cliente
customer_metrics = df1.groupby('customer_id').agg(
                   total_spent=('amount', 'sum'),
                   avg_purchase_value=('amount', 'mean'),
                   num_purchases=('purchase_id', 'count'),
                   most_frequent_category=('category', lambda x: x.mode()[0] if not x.mode().empty else None)
).reset_index()

In [8]:
top_5_customers = customer_metrics.nlargest(5, 'total_spent')
bottom_5_customers = customer_metrics.nsmallest(5, 'total_spent')

In [9]:
df1['month'] = df1['purchase_date'].dt.to_period('M')

In [10]:
monthly_trends = df1.groupby('month').agg(
                 total_sales=('amount', 'sum'),
                 avg_purchase_value=('amount', 'mean')
).reset_index()

In [11]:
latest_date = df1['purchase_date'].max()
three_months_ago = latest_date - pd.DateOffset(months=3)

inactive_customers = df1[df1['purchase_date'] <= three_months_ago]['customer_id'].unique()
active_customers = df1[df1['purchase_date'] > three_months_ago]['customer_id'].unique()
customers_no_recent_purchases = set(inactive_customers) - set(active_customers)

In [12]:
print("Customer Metrics:")
print(customer_metrics.head())

print("\nTop 5 customers - total amount:")
print(top_5_customers)

print("\nBottom 5 customers - total amount:")
print(bottom_5_customers)

print("\nPurchase Monthly Trends:")
print(monthly_trends)

print("\nCustomers without purchases in the last 3 months:")
print(customers_no_recent_purchases)

Customer Metrics:
  customer_id  total_spent  avg_purchase_value  num_purchases  \
0    CUST_001      1550.75           96.921875             16   
1    CUST_002      1313.75          101.057692             13   
2    CUST_003      1323.92          110.326667             12   
3    CUST_004      1102.69          100.244545             11   
4    CUST_005      1199.27          109.024545             11   

  most_frequent_category  
0          Home & Garden  
1            Electronics  
2               Clothing  
3            Electronics  
4            Electronics  

Top 5 customers - total amount:
   customer_id  total_spent  avg_purchase_value  num_purchases  \
61    CUST_062      2102.22          110.643158             19   
89    CUST_090      2040.24          102.012000             20   
98    CUST_099      2000.90          111.161111             18   
91    CUST_092      1779.33           93.648947             19   
32    CUST_033      1775.29           98.627222             18   


In [13]:
pickle.dump(df1, open('./data/customer_purchases.pkl', 'wb'))