In [1]:
import pandas as pd

In [3]:
transactions = pd.read_csv("../Transformación de Datos/Datasets/transactions.csv")
customers = pd.read_csv("../Transformación de Datos/Datasets/customers.csv")

In [5]:
transactions["event_ts"] = pd.to_datetime(transactions["timestamp"])

## Ordenar Valores

In [9]:
transactions.sort_values(["discount_pct", "event_ts"])

Unnamed: 0,transaction_id,customer_id,product_id,store_id,timestamp,ts_str,ts_tz,quantity,amount,amount_str,discount_pct,discount_str,payment_method,status,device,channel,hi_value_flag,event_ts
247,248,238,13,11,2024-01-02 07:31:39,2024-01-02T07:31:39,2024-01-02T07:31:39-05:00,5,1004.37,"$1,004.37",0,0%,cash,completed,ios,mobile,1,2024-01-02 07:31:39
689,690,74,61,12,2024-01-03 04:51:30,2024/01/03 04:51,2024-01-03T04:51:30-05:00,1,31.61,$31.61,0,0%,card,refunded,android,mobile,0,2024-01-03 04:51:30
1313,1314,223,20,8,2024-01-03 08:12:44,03/01/2024 08:12,2024-01-03T08:12:44-05:00,4,177.30,$177.30,0,0%,cash,completed,ios,mobile,0,2024-01-03 08:12:44
1331,1332,212,72,19,2024-01-03 16:44:48,2024-01-03 16:44:48,2024-01-03T16:44:48-05:00,3,391.87,$391.87,0,0%,transfer,completed,web,desktop,0,2024-01-03 16:44:48
1133,1134,166,59,10,2024-01-07 08:26:16,2024-01-07 08:26:16,2024-01-07T08:26:16-05:00,4,330.44,$330.44,0,0%,wallet,completed,web,desktop,0,2024-01-07 08:26:16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1271,1272,133,71,6,2025-07-15 11:30:25,2025-07-15T11:30:25,2025-07-15T11:30:25-05:00,1,72.52,$72.52,25,25%,transfer,pending,android,mobile,0,2025-07-15 11:30:25
707,708,87,13,1,2025-07-18 20:47:31,2025-07-18T20:47:31,2025-07-18T20:47:31-05:00,2,375.64,$375.64,25,25%,cash,pending,web,desktop,0,2025-07-18 20:47:31
124,125,281,73,20,2025-07-20 06:57:32,2025-07-20 06:57:32,2025-07-20T06:57:32-05:00,3,549.67,$549.67,25,25%,card,completed,web,desktop,0,2025-07-20 06:57:32
98,99,30,42,5,2025-07-20 22:54:04,20/07/2025 22:54,2025-07-20T22:54:04-05:00,2,393.42,$393.42,25,25%,card,pending,ios,mobile,0,2025-07-20 22:54:04


## Rolling por tamaño fijo de observaciones

In [14]:
transactions["amt_roll_last3_mean"] = (
    transactions
    .groupby("customer_id")["amount"]
    .rolling(3, min_periods=1) # 3: last 3 transactions, 1: at least 1 transaction
    .mean()
    .reset_index(level=0, drop=True)
)
transactions["amt_roll_last3_mean"]

0        770.880000
1        954.780000
2        711.320000
3       2887.960000
4          2.820000
           ...     
1495     203.950000
1496     424.960000
1497     190.736667
1498     600.756667
1499     570.766667
Name: amt_roll_last3_mean, Length: 1500, dtype: float64

## Ranking dentro de la partición

In [17]:
transactions["rank_amount_desc"] = (
    transactions
    .groupby("customer_id")["amount"]
    .rank(ascending=False, method="dense")
)

transactions["rank_amount_desc"].sort_values(ascending=True)

1499     1.0
881      1.0
297      1.0
294      1.0
891      1.0
        ... 
476     12.0
609     12.0
1272    13.0
444     14.0
32      15.0
Name: rank_amount_desc, Length: 1500, dtype: float64

## Expanding (Acumulados)

In [20]:
g = transactions.groupby("customer_id")["amount"]

# Cálculo de la suma acumulada
transactions["amt_cumsum"] = g.cumsum()

# Cálculo de la media acumulada
transactions["amt_cummean"] = g.expanding().mean().reset_index(level=0, drop=True)

## Ventana Exponencial

In [21]:
transactions["amt_ewm_span5"] = (
    transactions
    .groupby("customer_id")["amount"]
    .transform(lambda x: x.ewm(span=5, adjust=False).mean())
)

transactions["amt_ewm_span5"]

0        770.880000
1        954.780000
2        711.320000
3       2887.960000
4          2.820000
           ...     
1495     201.251481
1496     312.530370
1497     654.884321
1498     725.678519
1499     617.205185
Name: amt_ewm_span5, Length: 1500, dtype: float64

## GROUP BY ... HAVING

In [25]:
agg = transactions.groupby("customer_id", as_index=False).agg(
    total_spend=("amount","sum"),
    n_tx=("transaction_id","count")
)

agg.query("total_spend > 1000 and n_tx >= 5")

Unnamed: 0,customer_id,total_spend,n_tx
2,3,3270.74,6
3,4,5941.90,9
7,8,5268.95,11
8,9,2724.64,5
9,10,4965.64,8
...,...,...,...
290,292,2380.23,5
291,293,3804.37,9
292,294,1713.92,5
294,296,4651.28,6


In [27]:
transactions.groupby("customer_id").filter(lambda x: x["amount"].sum() > 1000)

Unnamed: 0,transaction_id,customer_id,product_id,store_id,timestamp,ts_str,ts_tz,quantity,amount,amount_str,...,status,device,channel,hi_value_flag,event_ts,amt_roll_last3_mean,amt_cumsum,amt_cummean,rank_amount_desc,amt_ewm_span5
0,1,258,35,2,2025-05-11 15:46:46,2025-05-11 15:46:46,2025-05-11T15:46:46-05:00,4,770.88,$770.88,...,completed,ios,mobile,1,2025-05-11 15:46:46,770.880000,770.88,770.8800,2.0,770.880000
1,2,208,62,16,2024-01-20 10:14:03,2024/01/20 10:14,2024-01-20T10:14:03-05:00,4,954.78,$954.78,...,pending,web,desktop,1,2024-01-20 10:14:03,954.780000,954.78,954.7800,1.0,954.780000
2,3,115,5,20,2024-07-09 05:06:54,09/07/2024 05:06,2024-07-09T05:06:54-05:00,4,711.32,$711.32,...,completed,android,mobile,1,2024-07-09 05:06:54,711.320000,711.32,711.3200,3.0,711.320000
3,4,288,33,16,2024-04-18 09:10:31,2024-04-18 09:10:31,2024-04-18T09:10:31-05:00,4,2887.96,"$2,887.96",...,completed,web,desktop,1,2024-04-18 09:10:31,2887.960000,2887.96,2887.9600,1.0,2887.960000
4,5,66,2,9,2025-06-09 14:35:33,2025-06-09T14:35:33,2025-06-09T14:35:33-05:00,1,2.82,$2.82,...,completed,ios,mobile,0,2025-06-09 14:35:33,2.820000,2.82,2.8200,5.0,2.820000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,1496,136,37,13,2025-03-05 13:22:13,05-Mar-2025 13:22:13,2025-03-05T13:22:13-05:00,1,163.03,$163.03,...,completed,android,mobile,0,2025-03-05 13:22:13,203.950000,1291.94,258.3880,3.0,201.251481
1496,1497,17,56,18,2024-12-01 10:13:45,01/12/2024 10:13,2024-12-01T10:13:45-05:00,2,390.84,$390.84,...,completed,android,mobile,0,2024-12-01 10:13:45,424.960000,1365.98,341.4950,2.0,312.530370
1497,1498,88,75,9,2025-06-11 22:25:49,2025-06-11T22:25:49,2025-06-11T22:25:49-05:00,1,379.73,$379.73,...,completed,android,mobile,0,2025-06-11 22:25:49,190.736667,5039.85,1007.9700,3.0,654.884321
1498,1499,209,52,20,2025-05-28 03:49:01,28-May-2025 03:49:01,2025-05-28T03:49:01-05:00,1,226.08,$226.08,...,completed,android,mobile,0,2025-05-28 03:49:01,600.756667,2991.62,747.9050,4.0,725.678519
