In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../data/data.csv")

In [3]:
df.columns

Index(['TransactionId', 'BatchId', 'AccountId', 'SubscriptionId', 'CustomerId',
       'CurrencyCode', 'CountryCode', 'ProviderId', 'ProductId',
       'ProductCategory', 'ChannelId', 'Amount', 'Value',
       'TransactionStartTime', 'PricingStrategy', 'FraudResult'],
      dtype='object')

In [5]:
# Grouping by CustomerId to create aggregate features
aggregated_features = df.groupby('CustomerId').agg({
    'Amount': ['sum', 'mean', 'max', 'std'],  # Spending patterns
    'TransactionStartTime': 'count'  # Number of transactions
})

# Rename columns for clarity
aggregated_features.columns = ['_'.join(col).strip() for col in aggregated_features.columns]
aggregated_features.reset_index(inplace=True)

aggregated_features

Unnamed: 0,CustomerId,Amount_sum,Amount_mean,Amount_max,Amount_std,TransactionStartTime_count
0,CustomerId_1,-10000.0,-10000.000000,-10000.0,,1
1,CustomerId_10,-10000.0,-10000.000000,-10000.0,,1
2,CustomerId_1001,20000.0,4000.000000,10000.0,6558.963333,5
3,CustomerId_1002,4225.0,384.090909,1500.0,560.498966,11
4,CustomerId_1003,20000.0,3333.333333,10000.0,6030.478146,6
...,...,...,...,...,...,...
3737,CustomerId_992,20000.0,3333.333333,10000.0,6088.240030,6
3738,CustomerId_993,20000.0,4000.000000,10000.0,6745.368782,5
3739,CustomerId_994,543873.0,5384.881188,90000.0,14800.656784,101
3740,CustomerId_996,139000.0,8176.470588,10000.0,4433.329648,17


In [6]:
import sys
sys.path.append('../scripts')

from feature_extraction import extract_transaction_features
# Extract features
df = extract_transaction_features(df, 'TransactionStartTime')
df

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,CurrencyCode,CountryCode,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Transaction Hour,Transaction Day,Transaction Month,Transaction Year
0,TransactionId_76871,BatchId_36123,AccountId_3957,SubscriptionId_887,CustomerId_4406,UGX,256,ProviderId_6,ProductId_10,airtime,ChannelId_3,1000.0,1000,2018-11-15 02:18:49+00:00,2,0,2,15,11,2018
1,TransactionId_73770,BatchId_15642,AccountId_4841,SubscriptionId_3829,CustomerId_4406,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2018-11-15 02:19:08+00:00,2,0,2,15,11,2018
2,TransactionId_26203,BatchId_53941,AccountId_4229,SubscriptionId_222,CustomerId_4683,UGX,256,ProviderId_6,ProductId_1,airtime,ChannelId_3,500.0,500,2018-11-15 02:44:21+00:00,2,0,2,15,11,2018
3,TransactionId_380,BatchId_102363,AccountId_648,SubscriptionId_2185,CustomerId_988,UGX,256,ProviderId_1,ProductId_21,utility_bill,ChannelId_3,20000.0,21800,2018-11-15 03:32:55+00:00,2,0,3,15,11,2018
4,TransactionId_28195,BatchId_38780,AccountId_4841,SubscriptionId_3829,CustomerId_988,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-644.0,644,2018-11-15 03:34:21+00:00,2,0,3,15,11,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95657,TransactionId_89881,BatchId_96668,AccountId_4841,SubscriptionId_3829,CustomerId_3078,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-1000.0,1000,2019-02-13 09:54:09+00:00,2,0,9,13,2,2019
95658,TransactionId_91597,BatchId_3503,AccountId_3439,SubscriptionId_2643,CustomerId_3874,UGX,256,ProviderId_6,ProductId_10,airtime,ChannelId_3,1000.0,1000,2019-02-13 09:54:25+00:00,2,0,9,13,2,2019
95659,TransactionId_82501,BatchId_118602,AccountId_4841,SubscriptionId_3829,CustomerId_3874,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2019-02-13 09:54:35+00:00,2,0,9,13,2,2019
95660,TransactionId_136354,BatchId_70924,AccountId_1346,SubscriptionId_652,CustomerId_1709,UGX,256,ProviderId_6,ProductId_19,tv,ChannelId_3,3000.0,3000,2019-02-13 10:01:10+00:00,2,0,10,13,2,2019
