In [1]:
# Rename and ensure timestamp is datetime
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer









In [None]:
# Cell 1: Rename columns and engineer features

def generate_offer_features(train_df, offers_df):
    # Rename columns


    train_df.rename(columns={
        'id1': 'interaction_id',
        'id2': 'customer_id',
        'id3': 'offer_id',
        'id4': 'impression_timestamp',
        'id5': 'impression_date',
        'y': 'click'
    }, inplace=True)

    
    offers_df.rename(columns={
        'id3': 'offer_id',
        'id9': 'offer_type',
        'f375': 'offer_code_type',
        'f376': 'offer_discount_value',
        'f377': 'offer_flag',
        'id10': 'offer_group',
        'id11': 'offer_metadata_flag',
        'f378': 'offer_extra_flag',
        'id12': 'offer_start',
        'id13': 'offer_end',
    }, inplace=True)

    

    #still the offerid column has str values in offer dataset and int32 values in train/test dataset
    columns_to_convert = ['offer_id','customer_id']
    offers_df['offer_id'] = offers_df['offer_id'].astype(str)
    train_df[columns_to_convert] = train_df[columns_to_convert].astype(str)


    # Ensure date fields are datetime
    offers_df['offer_start'] = pd.to_datetime(offers_df['offer_start'])
    offers_df['offer_end'] = pd.to_datetime(offers_df['offer_end'])

    # Feature 1: Offer duration in days
    offers_df['offer_duration_days'] = (offers_df['offer_end'] - offers_df['offer_start']).dt.days

    # Feature 2: Discount per day
    offers_df['discount_per_day'] = offers_df['offer_discount_value'] / offers_df['offer_duration_days'].replace(0, 1)

    # Merge engineered offer features with train_df
    train_df = train_df.merge(offers_df, on='offer_id', how='left')

    print("[✔️] Offers merged and features engineered")
    return train_df
