**Imports**

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import json
import simplejson
from pathlib import Path
from typing import List, Dict, Tuple
from sklearn.linear_model import LinearRegression

**Code**

In [7]:
df = pd.DataFrame({
    "user_id" : [1, 1, 2, 2, 2, 3],
    "event_time" : [
        "2024-01-01 10:00:00",
        "2024-01-01 10:00:00",
        "2024-01-02 11:00:00",
        "2024-01-02 11:05:00",
        "2024-01-02 11:05:00",
        "2024-01-03"
    ],
    "clicked_ad" : [1, 1, 0, 0, 0, 1]
})

print("=== Original ===")
print(df)
print("=== Duplicated ===")
df[df.duplicated()]

=== Original ===
   user_id           event_time  clicked_ad
0        1  2024-01-01 10:00:00           1
1        1  2024-01-01 10:00:00           1
2        2  2024-01-02 11:00:00           0
3        2  2024-01-02 11:05:00           0
4        2  2024-01-02 11:05:00           0
5        3           2024-01-03           1
=== Duplicated ===


Unnamed: 0,user_id,event_time,clicked_ad
1,1,2024-01-01 10:00:00,1
4,2,2024-01-02 11:05:00,0


In [31]:
rows = [
    {"user" : "U1", "day" : "2024-01-01", "product" : "A", "clicked" : 1},
    {"user" : "U1", "day" : "2024-01-01", "product" : "A", "clicked" : 1},
    {"user" : "U1", "day" : "2024-01-01", "product" : "B", "clicked" : 0},
    {"user" : "U2", "day" : "2024-01-02", "product" : "A", "clicked" : 1},
]

df = pd.DataFrame(rows)

aggperuser = (
    df.groupby("user")
       .agg(
           event_count=("product", "count"),
           ever_clicked=("clicked", "max")
       )
       .reset_index()
)

print("Remove exact duplicates")
print(df[df.duplicated()])
print("Define uniqueness rule and deduplicate by subject")
print(df.duplicated(subset = ["user", "day", "product"]))
print("Aggregate per user: event_count, ever_clicked")
print(aggperuser)

Remove exact duplicates
  user         day product  clicked
1   U1  2024-01-01       A        1
Define uniqueness rule and deduplicate by subject
0    False
1     True
2    False
3    False
dtype: bool
Aggregate per user: event_count, ever_clicked
  user  event_count  ever_clicked
0   U1            3             1
1   U2            1             1
