In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('processed/train_dataset.csv')

### Preparing dataset

In [None]:
X = df.drop(['activation','customer','merchant'], axis=1)
y = df['activation']

treatment_df = df[df['ind_recommended'] == 1]
control_df = df[df['ind_recommended'] == 0]

X_treatment = treatment_df.drop(['activation','customer','merchant'], axis=1)
y_treatment = treatment_df['activation']

X_control = control_df.drop(['activation','customer','merchant'], axis=1)
y_control = control_df['activation']

### T-Learner Model

In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
model_treatment = GradientBoostingRegressor()
model_control = GradientBoostingRegressor()

# Train the models
model_treatment.fit(X_treatment, y_treatment)
model_control.fit(X_control, y_control)

### Testing and Evaluation

In [None]:
test_df = pd.read_csv('processed/test_dataset.csv')

customer = test_df['customer']
merchant = test_df['merchant']
activation = test_df['activation']
ind_recommended = test_df['ind_recommended']
test_df.drop(['activation','ind_recommended', 'customer','merchant'], axis=1, inplace=True)

In [None]:
#Assume all treated
test_df_treated = test_df.copy()
test_df_treated['treatment'] = 1
y_test_treated = model_treatment.predict(test_df_treated)

In [None]:
#Assume all controlled
test_df_control = test_df.copy()
test_df_control['treatment'] = 0
y_test_control = model_control.predict(test_df_control)

In [None]:
y_pred = y_test_treated - y_test_control

In [None]:
output = pd.DataFrame({
    'customer': customer,
    'merchant': merchant,
    'activation' : activation,
    'ind_recommended' : ind_recommended,
    'predicted_activation_treated': y_test_treated,
    'predicted_activation_control': y_test_control,
    'predicted_score': y_pred
})

#### Evaluation on Incremental Activation Score

In [None]:
import pandas as pd

### Scoring function for participating teams :
def incr_act_top10(input_df: pd.DataFrame,
                   pred_col: str,
                   top: 10,
                   cm_key='customer',
                   treated_col='ind_recommended',
                   actual_col='activation'):
    '''
    Function that returns the incremental activation score for the AMEX Singapore Hackathon 2024

    input_df : pandas Dataframe which has customer, ind_recommended, activation and pred_col
    pred_col : name of your prediction score variable
    cm_key : customer unique ID (do not change)
    treated_col : indicator variable whether a merchant was recommended
    actual_col : whether a CM had transacted at a given merchant (target variable)

    Returns - incremental activation
    '''

	#for correcting variable types
    input_df[[treated_col, actual_col, pred_col]] = input_df[[treated_col, actual_col, pred_col]].apply(pd.to_numeric, errors='coerce')

    input_df['rank_per_cm1'] = input_df.groupby(cm_key)[pred_col].rank(method='first', ascending=False)

    input_df = input_df.loc[input_df.rank_per_cm1 <= top,:]

    agg_df = input_df.groupby(treated_col,as_index=False).agg({actual_col:'mean'})
    agg_df.columns = [treated_col,'avg_30d_act']

    recommended_avg_30d_act = float(agg_df.loc[agg_df[treated_col]==1,'avg_30d_act'])
    not_recommended_avg_30d_act = float(agg_df.loc[agg_df[treated_col]==0,'avg_30d_act'])

    return (recommended_avg_30d_act-not_recommended_avg_30d_act)

In [None]:
scores = []
for i in range(3, 11):
    scores.append(incr_act_top10(input_df = output, pred_col = 'predicted_score', top=i))
print(scores)

[0.0005609977420587375, 0.0004979716185112642, 0.0005762222237150022, 0.0004157592609769752, 0.00045293650095893853, 0.00046450275661957287, 0.0004434716002071192, 0.0004478634986726871]
