<a href="https://colab.research.google.com/github/AsmaaYassinDev/Behavioural-Anomaly-Detection-for-ATO-Fraud/blob/main/Supervised_Fraud_Model_(RandomForest)_Improved.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

print("\n--- Step 3: Merging Features with Transactions ---")
df_model_data = pd.merge(df_smart_sample, final_profiles, left_on='nameDest', right_on='user_id', how='left')
df_model_data = pd.merge(df_model_data, final_profiles, left_on='nameOrig', right_on='user_id', how='left', suffixes=('_dest', '_orig'))

# Fill missing behavioral features
for col in ['dest_cash_out_ratio_dest', 'dest_unique_senders_dest', 'dest_cash_out_ratio_orig', 'dest_unique_senders_orig']:
    df_model_data[col] = df_model_data[col].fillna(0)

# Add engineered features
df_model_data['balance_diff_orig'] = df_model_data['oldbalanceOrg'] - df_model_data['newbalanceOrig']
df_model_data['balance_diff_dest'] = df_model_data['newbalanceDest'] - df_model_data['oldbalanceDest']
df_model_data['type_encoded'] = df_model_data['type'].astype('category').cat.codes

# Select features after Feature Selection
selected_features = [
    'amount',
    'type_encoded',
    'dest_cash_out_ratio_dest',
    'dest_unique_senders_dest',
    'dest_cash_out_ratio_orig',
    'dest_unique_senders_orig',
    'balance_diff_orig',
    'balance_diff_dest'
]

X = df_model_data[selected_features]
y = df_model_data['isFraud']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, stratify=y, random_state=42
)
print(f"Training size: {len(X_train)}, Test size: {len(X_test)}")

# Train Random Forest
print("\n--- Training Random Forest Classifier ---")
model = RandomForestClassifier(
    random_state=42,
    class_weight='balanced'
)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("\n--- Random Forest Results After Feature Selection ---")
print(f"Precision: {precision:.2%}")
print(f"Recall: {recall:.2%}")
print(f"F1-Score: {f1:.2%}")
print("Confusion Matrix:")
print(cm)

# Feature Importance
import pandas as pd
importance = pd.Series(model.feature_importances_, index=selected_features).sort_values(ascending=False)
print("\n--- Feature Importance ---")
print(importance)



--- Step 3: Merging Features with Transactions ---
Training size: 392807, Test size: 168347

--- Training Random Forest Classifier ---

--- Random Forest Results After Feature Selection ---
Precision: 87.79%
Recall: 75.28%
F1-Score: 81.06%
Confusion Matrix:
[[165625    258]
 [   609   1855]]

--- Feature Importance ---
balance_diff_orig           0.449018
balance_diff_dest           0.157517
amount                      0.155614
type_encoded                0.147306
dest_unique_senders_dest    0.054703
dest_cash_out_ratio_orig    0.035736
dest_cash_out_ratio_dest    0.000093
dest_unique_senders_orig    0.000013
dtype: float64


In [None]:
from google.colab import drive
drive.mount('/content/drive')