In [5]:
# Rename and ensure timestamp is datetime
import pandas as pd
import numpy as np







In [1]:
def generate_event_features(train_df, events_df):
    import pandas as pd

    # Rename and convert
    events_df.rename(columns={
        'id2': 'customer_id',
        'id3': 'offer_id',
        'id4': 'event_timestamp',
        'id6': 'event_type'
    }, inplace=True)
    events_df['event_timestamp'] = pd.to_datetime(events_df['event_timestamp'])
    events_df['offer_id'] = events_df['offer_id'].astype(str)

    print("[✓] Preprocessed events.")

    # Group by offer_id FIRST
    event_agg = events_df.groupby('offer_id').agg(
        offer_event_count=('event_type', 'count'),
        unique_event_types=('event_type', pd.Series.nunique),
        avg_event_hour=('event_timestamp', lambda x: x.dt.hour.mean()),
        earliest_event=('event_timestamp', 'min'),
        latest_event=('event_timestamp', 'max'),
    ).reset_index()

    event_agg['event_span_days'] = (event_agg['latest_event'] - event_agg['earliest_event']).dt.days

    # Optional: One-hot event types by offer
    event_type_counts = events_df.pivot_table(
        index='offer_id',
        columns='event_type',
        aggfunc='size',
        fill_value=0
    ).reset_index()

    # Merge all offer-level features
    event_features = event_agg.merge(event_type_counts, on='offer_id', how='left')

    # Now safe to merge into train
    train_df = train_df.merge(event_features, on='offer_id', how='left')

    print("[✓] Merged offer-level event features into train_df.")
    display(train_df.head(20))

    return train_df
