In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load the dataset
df = pd.read_csv('mobile_money_transactions.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])  # Ensure Timestamp is datetime type
print("Dataset loaded. Shape:", df.shape)

# Sort by Sender_ID and Timestamp for sequential analysis
df = df.sort_values(['Sender_ID', 'Timestamp']).reset_index(drop=True)


Dataset loaded. Shape: (1000, 8)


In [7]:
# 1. Transaction Frequency (count of transactions per sender in last hour)
# Use 'on' parameter with a different column to count occurrences
rolling_counts = df.groupby('Sender_ID').rolling('1h', on='Timestamp')['Amount'].count().reset_index()
df['Rolling_Count_1h'] = rolling_counts['Amount']

In [8]:
# 2. Time Delta (time between consecutive transactions for same sender)
df['Time_Delta'] = df.groupby('Sender_ID')['Timestamp'].diff().dt.total_seconds().fillna(0)
# Flag rapid transactions (less than 60 seconds apart)
df['Rapid_Transaction'] = (df['Time_Delta'] < 60).astype(int)


In [9]:
# 3. Amount Statistics (per sender)
amount_stats = df.groupby('Sender_ID')['Amount'].agg(['mean', 'max', 'min']).reset_index()
amount_stats.columns = ['Sender_ID', 'Avg_Amount', 'Max_Amount', 'Min_Amount']
df = df.merge(amount_stats, on='Sender_ID', how='left')

In [10]:
# 4. Location Consistency (flag unrealistic location changes)
df['Prev_Location'] = df.groupby('Sender_ID')['Location'].shift(1)
df['Location_Change'] = (df['Location'] != df['Prev_Location']).astype(int) & (df['Time_Delta'] < 300)  # 5 mins
df['Location_Change'] = df['Location_Change'].fillna(0)  # Fill NaN for first transaction

In [11]:
# 5. Device Usage (count unique devices per sender)
device_counts = df.groupby('Sender_ID')['Device_ID'].nunique().reset_index()
device_counts.columns = ['Sender_ID', 'Unique_Devices']
df = df.merge(device_counts, on='Sender_ID', how='left')

In [12]:
# 6. Transaction Type Ratio (proportion of 'Send Money' per sender)
send_money_ratio = df.groupby('Sender_ID').apply(
    lambda x: (x['Transaction_Type'] == 'Send Money').mean()
).reset_index()
send_money_ratio.columns = ['Sender_ID', 'Send_Money_Ratio']
df = df.merge(send_money_ratio, on='Sender_ID', how='left')

  send_money_ratio = df.groupby('Sender_ID').apply(


In [13]:
# 7. Hour of Day (extract hour from timestamp)
df['Hour_of_Day'] = df['Timestamp'].dt.hour

# Drop temporary columns
df = df.drop(columns=['Prev_Location'])

In [14]:
# Preview the engineered features
print("\nDataset with Engineered Features Preview:")
print(df.head())
print("\nFeature Info:")
print(df.info())


Dataset with Engineered Features Preview:
            Timestamp   Sender_ID   Receiver_ID  Amount Location  \
0 2025-02-08 17:48:37    25474373  254756463482     910  Eldoret   
1 2025-03-28 11:00:00   254799416  254728125617    5138   Nakuru   
2 2025-03-01 22:05:16  2547108503  254798025182     351  Mombasa   
3 2025-03-03 12:11:54  2547116938  254712244038    7021  Mombasa   
4 2025-02-13 14:24:36  2547188623  254780511400    9141   Kisumu   

                              Device_ID Transaction_Type  Fraud_Label  \
0  52773608-4352-428d-8695-e2c17e1e941d         Withdraw            0   
1  717c7d92-6128-480d-9767-5925e58b69d4       Send Money            0   
2  91830e71-7057-4f07-ac63-0e84c0ec53d8        Buy Goods            0   
3  6a0263fc-d763-4cb8-90d6-b4e1f1ae0bac       Send Money            0   
4  d68b2c5c-7453-421f-a42a-4a7e8326b74d         Pay Bill            0   

   Rolling_Count_1h  Time_Delta  Rapid_Transaction  Avg_Amount  Max_Amount  \
0               1.0         0.0

In [15]:
# Save the enhanced dataset
df.to_csv('mobile_money_features.csv', index=False)
print("Enhanced dataset saved as 'mobile_money_features.csv'")

Enhanced dataset saved as 'mobile_money_features.csv'
