## Import Libraries

In [1]:
import sys
sys.path.append('..')
from src.data_processing import load_data, CreditRiskFeatureEngineer
from src.rmf_target import RFMBasedTargetBuilder
feature_engineer = CreditRiskFeatureEngineer()
rfm_builder = RFMBasedTargetBuilder()



In [2]:
df = load_data('../data/raw/data.csv')
df.head(5)

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,CurrencyCode,CountryCode,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult
0,TransactionId_76871,BatchId_36123,AccountId_3957,SubscriptionId_887,CustomerId_4406,UGX,256,ProviderId_6,ProductId_10,airtime,ChannelId_3,1000.0,1000,2018-11-15T02:18:49Z,2,0
1,TransactionId_73770,BatchId_15642,AccountId_4841,SubscriptionId_3829,CustomerId_4406,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2018-11-15T02:19:08Z,2,0
2,TransactionId_26203,BatchId_53941,AccountId_4229,SubscriptionId_222,CustomerId_4683,UGX,256,ProviderId_6,ProductId_1,airtime,ChannelId_3,500.0,500,2018-11-15T02:44:21Z,2,0
3,TransactionId_380,BatchId_102363,AccountId_648,SubscriptionId_2185,CustomerId_988,UGX,256,ProviderId_1,ProductId_21,utility_bill,ChannelId_3,20000.0,21800,2018-11-15T03:32:55Z,2,0
4,TransactionId_28195,BatchId_38780,AccountId_4841,SubscriptionId_3829,CustomerId_988,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-644.0,644,2018-11-15T03:34:21Z,2,0


## Feature Engineering

In [3]:
X_features, features_df = feature_engineer.fit_transform(df)

features_df.head()

Unnamed: 0,CustomerId,total_amount,avg_amount,transaction_count,std_amount,transaction_hour,transaction_day,transaction_month,transaction_year,ProductCategory,ChannelId,ProviderId,PricingStrategy
0,CustomerId_1,-10000.0,-10000.0,1,,16.0,21.0,11.0,2018.0,airtime,ChannelId_2,ProviderId_4,4
1,CustomerId_10,-10000.0,-10000.0,1,,16.0,21.0,11.0,2018.0,airtime,ChannelId_2,ProviderId_4,4
2,CustomerId_1001,20000.0,4000.0,5,6558.963333,7.8,16.0,11.0,2018.0,financial_services,ChannelId_3,ProviderId_5,2
3,CustomerId_1002,4225.0,384.090909,11,560.498966,13.454545,14.727273,8.636364,2018.272727,airtime,ChannelId_3,ProviderId_5,4
4,CustomerId_1003,20000.0,3333.333333,6,6030.478146,14.333333,1.0,2.0,2019.0,financial_services,ChannelId_3,ProviderId_5,2


## Proxy Target Engineering

In [4]:
rfm_df = rfm_builder.compute_rfm(df)
target_df = rfm_builder.assign_high_risk_label(rfm_df)

target_df.head()

Unnamed: 0,CustomerId,is_high_risk
0,CustomerId_1,0
1,CustomerId_10,0
2,CustomerId_1001,0
3,CustomerId_1002,0
4,CustomerId_1003,0


## FINAL DATASET

In [5]:
final_dataset = features_df.merge(
    target_df,
    on="CustomerId",
    how="left"
)

final_dataset.head()

Unnamed: 0,CustomerId,total_amount,avg_amount,transaction_count,std_amount,transaction_hour,transaction_day,transaction_month,transaction_year,ProductCategory,ChannelId,ProviderId,PricingStrategy,is_high_risk
0,CustomerId_1,-10000.0,-10000.0,1,,16.0,21.0,11.0,2018.0,airtime,ChannelId_2,ProviderId_4,4,0
1,CustomerId_10,-10000.0,-10000.0,1,,16.0,21.0,11.0,2018.0,airtime,ChannelId_2,ProviderId_4,4,0
2,CustomerId_1001,20000.0,4000.0,5,6558.963333,7.8,16.0,11.0,2018.0,financial_services,ChannelId_3,ProviderId_5,2,0
3,CustomerId_1002,4225.0,384.090909,11,560.498966,13.454545,14.727273,8.636364,2018.272727,airtime,ChannelId_3,ProviderId_5,4,0
4,CustomerId_1003,20000.0,3333.333333,6,6030.478146,14.333333,1.0,2.0,2019.0,financial_services,ChannelId_3,ProviderId_5,2,0
