In [7]:
!python --version


Python 3.12.2


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import warnings
warnings.filterwarnings("ignore")

In [15]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, f1_score
from sklearn.feature_selection import VarianceThreshold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

# Load dataset
df = pd.read_csv('synthetic_upi_transactions.csv', parse_dates=['Timestamp'])
df.sort_values(by='Timestamp', inplace=True)

# Feature Engineering
df['TimeSinceLastTX'] = df.groupby('Sender UPI ID')['Timestamp'].diff().dt.total_seconds().fillna(0)
df['Hour'] = df['Timestamp'].dt.hour
df['AvgAmountSender'] = df.groupby('Sender UPI ID')['Amount (INR)'].transform('mean')
df['AvgAmountDevice'] = df.groupby('Device ID')['Amount (INR)'].transform('mean')
df['Note'] = df['Note'].fillna('')
note_counts = df['Note'].value_counts().to_dict()
df['NoteFreq'] = df['Note'].map(note_counts)

# One-hot encode Transaction Type
ohe = OneHotEncoder(sparse_output=False)
trans_type_ohe = ohe.fit_transform(df[['Transaction Type']])
trans_type_df = pd.DataFrame(trans_type_ohe, columns=ohe.get_feature_names_out(['Transaction Type']))
df = pd.concat([df.reset_index(drop=True), trans_type_df.reset_index(drop=True)], axis=1)

# Drop leaked/irrelevant columns
df.drop(columns=[
    'Transaction ID', 'Timestamp', 'Sender Name', 'Receiver Name',
    'Sender UPI ID', 'Receiver UPI ID', 'Note',
    'Device Type', 'Device ID', 'Transaction Type'
], inplace=True)

# Label and feature separation
labels = df['Fraud']
features = df.drop(columns=['Fraud'])

# Sanity check
assert not features.isnull().any().any(), "NaN detected"
assert np.isfinite(features.values).all(), "Inf/-Inf detected"

# Remove constant features
features = pd.DataFrame(VarianceThreshold(0.0).fit_transform(features))

# Scaling
features = features.clip(lower=-1e3, upper=1e3)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(features)

# Split data
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X_scaled, labels, test_size=0.5, stratify=labels, random_state=42
)

# Only train on normal (genuine) transactions
X_train = X_train_full[y_train_full == 0]

# Autoencoder Model
input_dim = X_train.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(64, activation='relu')(encoded)
encoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(128, activation='relu')(decoded)
output_layer = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer=Adam(learning_rate=1e-4), loss='mse')

# Train
early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
autoencoder.fit(X_train, X_train, 
                epochs=100, batch_size=256, 
                shuffle=True, verbose=1, 
                callbacks=[early_stop])

# Inference
reconstructions = autoencoder.predict(X_test)
mse = np.mean(np.power(X_test - reconstructions, 2), axis=1)

# Optimal Threshold via F1 Score
best_thresh, best_f1 = 0, 0
for t in np.linspace(min(mse), max(mse), 200):
    preds = (mse > t).astype(int)
    f1 = f1_score(y_test, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = t

print(f"\n🔍 Best Threshold (F1 Optimized): {best_thresh:.6f}, Best F1: {best_f1:.4f}")

# Final Prediction
y_pred = (mse > best_thresh).astype(int)

# Evaluation
print("\n📊 Evaluation Metrics:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, mse))


Epoch 1/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - loss: 0.2139
Epoch 2/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0792
Epoch 3/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 0.0503
Epoch 4/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0252
Epoch 5/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 0.0136
Epoch 6/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.0090
Epoch 7/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.0062
Epoch 8/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 0.0042
Epoch 9/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.0024
Epoch 10/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s

In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, f1_score
from sklearn.feature_selection import VarianceThreshold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

# Load dataset
df = pd.read_csv('synthetic_upi_transactions.csv', parse_dates=['Timestamp'])
df.sort_values(by='Timestamp', inplace=True)

# Feature engineering
df['TimeSinceLastTX'] = df.groupby('Sender UPI ID')['Timestamp'].diff().dt.total_seconds().fillna(0)
df['Hour'] = df['Timestamp'].dt.hour
df['AvgAmountSender'] = df.groupby('Sender UPI ID')['Amount (INR)'].transform('mean')
df['AvgAmountDevice'] = df.groupby('Device ID')['Amount (INR)'].transform('mean')
df['Note'] = df['Note'].fillna('')
note_counts = df['Note'].value_counts().to_dict()
df['NoteFreq'] = df['Note'].map(note_counts)

# One-hot encode Transaction Type
ohe = OneHotEncoder(sparse_output=False)
trans_type_ohe = ohe.fit_transform(df[['Transaction Type']])
trans_type_df = pd.DataFrame(trans_type_ohe, columns=ohe.get_feature_names_out(['Transaction Type']))
df = pd.concat([df.reset_index(drop=True), trans_type_df.reset_index(drop=True)], axis=1)

# Drop irrelevant/leaking columns
df.drop(columns=[
    'Transaction ID', 'Timestamp', 'Sender Name', 'Receiver Name',
    'Sender UPI ID', 'Receiver UPI ID', 'Note',
    'Device Type', 'Device ID', 'Transaction Type'
], inplace=True)

# Labels and features
labels = df['Fraud']
features = df.drop(columns=['Fraud'])

# Clean features
assert not features.isnull().any().any()
assert np.isfinite(features.values).all()

# Remove constant features
vt = VarianceThreshold(threshold=0.0)
features = pd.DataFrame(vt.fit_transform(features))

# Use Standard Scaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# Split dataset
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X_scaled, labels, test_size=0.5, stratify=labels, random_state=42
)

# Only train on normal data (non-fraud)
X_train = X_train_full[y_train_full == 0]

# Autoencoder
input_dim = X_train.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(64, activation='relu')(encoded)
encoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='relu')(encoded)
decoded = Dense(128, activation='relu')(decoded)
output_layer = Dense(input_dim, activation='linear')(decoded)  # Linear activation here

autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer=Adam(learning_rate=1e-4), loss='mse')

# Train
early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
autoencoder.fit(X_train, X_train, 
                epochs=100, batch_size=256, 
                shuffle=True, verbose=1, 
                callbacks=[early_stop])

# Predict reconstruction error
reconstructions = autoencoder.predict(X_test)
mse = np.mean(np.square(X_test - reconstructions), axis=1)

# Tune threshold for best F1
best_thresh, best_f1 = 0, 0
for t in np.linspace(np.percentile(mse, 50), np.percentile(mse, 99.9), 200):
    preds = (mse > t).astype(int)
    f1 = f1_score(y_test, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = t

print(f"\n🔍 Best Threshold (F1 Optimized): {best_thresh:.6f}, Best F1: {best_f1:.4f}")

# Final prediction
y_pred = (mse > best_thresh).astype(int)

# Evaluate
print("\n📊 Evaluation Metrics:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, mse))


Epoch 1/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - loss: 0.9420
Epoch 2/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.4901
Epoch 3/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.1392
Epoch 4/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 0.0461
Epoch 5/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0364
Epoch 6/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0154
Epoch 7/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0035
Epoch 8/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0025
Epoch 9/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0020
Epoch 10/100
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, f1_score
from sklearn.feature_selection import VarianceThreshold
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

# Load and preprocess dataset
df = pd.read_csv('synthetic_upi_transactions.csv', parse_dates=['Timestamp'])
df.sort_values(by='Timestamp', inplace=True)

# Feature engineering
df['TimeSinceLastTX'] = df.groupby('Sender UPI ID')['Timestamp'].diff().dt.total_seconds().fillna(0)
df['Hour'] = df['Timestamp'].dt.hour
df['AvgAmountSender'] = df.groupby('Sender UPI ID')['Amount (INR)'].transform('mean')
df['AvgAmountDevice'] = df.groupby('Device ID')['Amount (INR)'].transform('mean')
df['Note'] = df['Note'].fillna('')
df['NoteFreq'] = df['Note'].map(df['Note'].value_counts().to_dict())

# One-hot encode Transaction Type
ohe = OneHotEncoder(sparse_output=False)
trans_type_df = pd.DataFrame(ohe.fit_transform(df[['Transaction Type']]), 
                             columns=ohe.get_feature_names_out(['Transaction Type']))
df = pd.concat([df.reset_index(drop=True), trans_type_df.reset_index(drop=True)], axis=1)

# Drop irrelevant or leaked features
drop_cols = ['Transaction ID', 'Timestamp', 'Sender Name', 'Receiver Name',
             'Sender UPI ID', 'Receiver UPI ID', 'Note',
             'Device Type', 'Device ID', 'Transaction Type']
df.drop(columns=drop_cols, inplace=True)

# Separate label
labels = df['Fraud']
features = df.drop(columns=['Fraud'])

# Remove constant features
vt = VarianceThreshold(threshold=0.0)
features = pd.DataFrame(vt.fit_transform(features), columns=features.columns[vt.get_support()])

# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(features)

# SMOTE to balance classes (only for evaluation, not for training autoencoders)
X_train_full, X_test, y_train_full, y_test = train_test_split(X_scaled, labels, test_size=0.5, random_state=42, stratify=labels)
smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X_train_full, y_train_full)

# Use only genuine transactions to train each autoencoder
X_train_genuine = X_train_full[y_train_full == 0]

# Autoencoder builder
def build_autoencoder(input_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(64, activation='relu')(input_layer)
    encoded = Dense(32, activation='relu')(encoded)
    encoded = Dense(16, activation='relu')(encoded)
    decoded = Dense(32, activation='relu')(encoded)
    decoded = Dense(64, activation='relu')(decoded)
    output_layer = Dense(input_dim, activation='sigmoid')(decoded)
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse')
    return model

# Train multiple autoencoders
n_autoencoders = 3
autoencoders = []
for i in range(n_autoencoders):
    ae = build_autoencoder(X_train_genuine.shape[1])
    early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
    ae.fit(X_train_genuine, X_train_genuine,
           epochs=50, batch_size=128, verbose=0, shuffle=True,
           callbacks=[early_stop])
    autoencoders.append(ae)

# Average reconstruction error across all autoencoders
reconstruction_errors = np.zeros(len(X_test))
for ae in autoencoders:
    recon = ae.predict(X_test, verbose=0)
    error = np.mean(np.square(X_test - recon), axis=1)
    reconstruction_errors += error
reconstruction_errors /= n_autoencoders

# Optimal threshold via F1-score
best_thresh, best_f1 = 0, 0
for t in np.linspace(min(reconstruction_errors), max(reconstruction_errors), 100):
    preds = (reconstruction_errors > t).astype(int)
    f1 = f1_score(y_test, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = t

print(f"\n🔍 Best Threshold (F1 Optimized): {best_thresh:.6f}, Best F1: {best_f1:.4f}")

# Final predictions
final_preds = (reconstruction_errors > best_thresh).astype(int)

# Evaluation
print("\n📊 Evaluation Metrics:")
print(classification_report(y_test, final_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, final_preds))
print("ROC AUC Score:", roc_auc_score(y_test, reconstruction_errors))



🔍 Best Threshold (F1 Optimized): 0.000859, Best F1: 0.5782

📊 Evaluation Metrics:
              precision    recall  f1-score   support

           0       0.77      0.98      0.86     33106
           1       0.91      0.42      0.58     16894

    accuracy                           0.79     50000
   macro avg       0.84      0.70      0.72     50000
weighted avg       0.82      0.79      0.77     50000

Confusion Matrix:
 [[32373   733]
 [ 9726  7168]]
ROC AUC Score: 0.8038116724141992


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, auc, classification_report, confusion_matrix
from sklearn.feature_selection import VarianceThreshold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

# Load dataset
df = pd.read_csv('synthetic_upi_transactions.csv', parse_dates=['Timestamp'])
df.sort_values(by='Timestamp', inplace=True)

# Feature engineering
df['TimeSinceLastTX'] = df.groupby('Sender UPI ID')['Timestamp'].diff().dt.total_seconds().fillna(0)
df['Hour'] = df['Timestamp'].dt.hour
df['AvgAmountSender'] = df.groupby('Sender UPI ID')['Amount (INR)'].transform('mean')
df['AvgAmountDevice'] = df.groupby('Device ID')['Amount (INR)'].transform('mean')
df['Note'] = df['Note'].fillna('')
note_counts = df['Note'].value_counts().to_dict()
df['NoteFreq'] = df['Note'].map(note_counts)

# One-hot encode Transaction Type
ohe = OneHotEncoder(sparse_output=False)
trans_type_ohe = ohe.fit_transform(df[['Transaction Type']])
trans_type_df = pd.DataFrame(trans_type_ohe, columns=ohe.get_feature_names_out(['Transaction Type']))
df = pd.concat([df.reset_index(drop=True), trans_type_df.reset_index(drop=True)], axis=1)

# Drop irrelevant/leaking columns
df.drop(columns=[
    'Transaction ID', 'Timestamp', 'Sender Name', 'Receiver Name',
    'Sender UPI ID', 'Receiver UPI ID', 'Note',
    'Device Type', 'Device ID', 'Transaction Type'
], inplace=True)

# Labels and features
labels = df['Fraud']
features = df.drop(columns=['Fraud'])

# Clean and scale features
vt = VarianceThreshold(threshold=0.0)
features = pd.DataFrame(vt.fit_transform(features))
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# Split into test and rest
X_rest, X_test, y_rest, y_test = train_test_split(
    X_scaled, labels, test_size=0.5, stratify=labels, random_state=42
)

# Use only genuine data for training
X_rest = X_rest[y_rest == 0]

# Function to create autoencoder
def create_autoencoder(input_dim):
    inp = Input(shape=(input_dim,))
    x = Dense(128, activation='relu')(inp)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(input_dim, activation='linear')(x)
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=Adam(1e-4), loss='mse')
    return model

# Train multiple autoencoders on bootstrapped samples
n_models = 3
input_dim = X_rest.shape[1]
models = []
recon_errors = []

for i in range(n_models):
    # Bootstrap sample from normal data
    idx = np.random.choice(len(X_rest), len(X_rest), replace=True)
    X_boot = X_rest[idx]

    model = create_autoencoder(input_dim)
    early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
    model.fit(X_boot, X_boot, epochs=100, batch_size=256, verbose=0, callbacks=[early_stop])
    
    # Predict reconstruction error on test
    pred = model.predict(X_test)
    err = np.mean(np.square(X_test - pred), axis=1)
    recon_errors.append(err)
    models.append(model)

# Average reconstruction error
avg_error = np.mean(recon_errors, axis=0)

# Precision-Recall AUC optimization
precisions, recalls, thresholds = precision_recall_curve(y_test, avg_error)
pr_au_score = auc(recalls, precisions)
f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-10)
best_f1_index = np.argmax(f1_scores)
best_thresh = thresholds[best_f1_index]
best_f1 = f1_scores[best_f1_index]

# Final predictions
y_pred = (avg_error > best_thresh).astype(int)

# Results
print(f"\n🔍 Best Threshold (F1 Optimized using PR Curve): {best_thresh:.6f}")
print(f"📈 Precision-Recall AUC: {pr_au_score:.4f}, Best F1: {best_f1:.4f}")
print("\n📊 Evaluation Metrics:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, reconstruction_errors))



[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 975us/step
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 968us/step
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 944us/step

🔍 Best Threshold (F1 Optimized using PR Curve): 0.000422
📈 Precision-Recall AUC: 0.9076, Best F1: 0.8065

📊 Evaluation Metrics:
              precision    recall  f1-score   support

           0       0.90      0.91      0.90     33106
           1       0.82      0.79      0.81     16894

    accuracy                           0.87     50000
   macro avg       0.86      0.85      0.85     50000
weighted avg       0.87      0.87      0.87     50000

Confusion Matrix:
 [[30140  2966]
 [ 3474 13420]]


In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, auc, classification_report, confusion_matrix
from sklearn.feature_selection import VarianceThreshold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import warnings
warnings.filterwarnings("ignore")

# Load and sort data
df = pd.read_csv('synthetic_upi_transactions.csv', parse_dates=['Timestamp'])
df.sort_values(by='Timestamp', inplace=True)

# === FEATURE ENGINEERING ===

# Time since last transaction (per user)
df['TimeSinceLastTX'] = df.groupby('Sender UPI ID')['Timestamp'].diff().dt.total_seconds().fillna(0)

# Hour of transaction
df['Hour'] = df['Timestamp'].dt.hour

# Average and Std of amount sent by user (risk profiling)
df['AvgAmountSender'] = df.groupby('Sender UPI ID')['Amount (INR)'].transform('mean')
df['StdAmountSender'] = df.groupby('Sender UPI ID')['Amount (INR)'].transform('std').fillna(0)

# Average amount sent from device
df['AvgAmountDevice'] = df.groupby('Device ID')['Amount (INR)'].transform('mean')

# Note frequency
df['Note'] = df['Note'].fillna('')
note_counts = df['Note'].value_counts().to_dict()
df['NoteFreq'] = df['Note'].map(note_counts)

# === Transaction Velocity (per user in rolling 1.5 hour window) ===
# df = df.set_index('Timestamp')

df['TXCount_1_5hr'] = (
    df.groupby('Sender UPI ID')['Sender UPI ID']
      .rolling(5400)
      .count()
      .reset_index(level=0, drop=True)
      .fillna(1)
)

df = df.reset_index()




# One-hot encode Transaction Type
ohe = OneHotEncoder(sparse_output=False)
trans_type_ohe = ohe.fit_transform(df[['Transaction Type']])
trans_type_df = pd.DataFrame(trans_type_ohe, columns=ohe.get_feature_names_out(['Transaction Type']))
df = pd.concat([df.reset_index(drop=True), trans_type_df.reset_index(drop=True)], axis=1)

# Drop irrelevant/leaking columns
df.drop(columns=[
    'Transaction ID', 'Timestamp', 'Sender Name', 'Receiver Name',
    'Sender UPI ID', 'Receiver UPI ID', 'Note',
    'Device Type', 'Device ID', 'Transaction Type'
], inplace=True)

# Separate features and labels
labels = df['Fraud']
features = df.drop(columns=['Fraud'])

# Clean and scale features
vt = VarianceThreshold(threshold=0.0)
features = pd.DataFrame(vt.fit_transform(features))
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# Split into test and rest
X_rest, X_test, y_rest, y_test = train_test_split(
    X_scaled, labels, test_size=0.5, stratify=labels, random_state=42
)

# Use only genuine transactions for training
X_rest = X_rest[y_rest == 0]

# === AUTOENCODER WITH L1 REGULARIZATION ===
def create_autoencoder(input_dim):
    inp = Input(shape=(input_dim,))
    x = Dense(128, activation='relu', activity_regularizer=regularizers.l1(1e-5))(inp)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(input_dim, activation='linear')(x)
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=Adam(1e-4), loss='mse')
    return model

# Train multiple autoencoders on bootstrapped samples
n_models = 4
input_dim = X_rest.shape[1]
models = []
recon_errors = []

for i in range(n_models):
    idx = np.random.choice(len(X_rest), len(X_rest), replace=True)
    X_boot = X_rest[idx]

    model = create_autoencoder(input_dim)
    early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
    model.fit(X_boot, X_boot, epochs=100, batch_size=256, verbose=0, callbacks=[early_stop])

    pred = model.predict(X_test)
    err = np.mean(np.square(X_test - pred), axis=1)
    recon_errors.append(err)
    models.append(model)

# Average reconstruction error
avg_error = np.mean(recon_errors, axis=0)

# Precision-Recall AUC optimization
precisions, recalls, thresholds = precision_recall_curve(y_test, avg_error)
pr_au_score = auc(recalls, precisions)
f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-10)
best_f1_index = np.argmax(f1_scores)
best_thresh = thresholds[best_f1_index]
best_f1 = f1_scores[best_f1_index]

# Final predictions
y_pred = (avg_error > best_thresh).astype(int)

# === RESULTS ===
print(f"\n🔍 Best Threshold (F1 Optimized using PR Curve): {best_thresh:.6f}")
print(f"📈 Precision-Recall AUC: {pr_au_score:.4f}, Best F1: {best_f1:.4f}")
print("\n📊 Evaluation Metrics:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, reconstruction_errors))



[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

🔍 Best Threshold (F1 Optimized using PR Curve): 0.000927
📈 Precision-Recall AUC: 0.8687, Best F1: 0.7685

📊 Evaluation Metrics:
              precision    recall  f1-score   support

           0       0.85      0.96      0.90     33106
           1       0.90      0.67      0.77     16894

    accuracy                           0.86     50000
   macro avg       0.88      0.82      0.84     50000
weighted avg       0.87      0.86      0.86     50000

Confusion Matrix:
 [[31902  1204]
 [ 5602 11292]]
ROC AUC Score: 0.8038116724141992


In [13]:
import pandas as pd
import numpy as np
import joblib
import warnings
import pickle   
warnings.filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, auc, classification_report, confusion_matrix, roc_auc_score
from sklearn.feature_selection import VarianceThreshold

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

# Load dataset
df = pd.read_csv('synthetic_upi_transactions.csv', parse_dates=['Timestamp'])
df.sort_values(by='Timestamp', inplace=True)

# Feature engineering
df['TimeSinceLastTX'] = df.groupby('Sender UPI ID')['Timestamp'].diff().dt.total_seconds().fillna(0)
df['Hour'] = df['Timestamp'].dt.hour
df['AvgAmountSender'] = df.groupby('Sender UPI ID')['Amount (INR)'].transform('mean')
df['AvgAmountDevice'] = df.groupby('Device ID')['Amount (INR)'].transform('mean')
df['Note'] = df['Note'].fillna('')
note_counts = df['Note'].value_counts().to_dict()
df['NoteFreq'] = df['Note'].map(note_counts)

# One-hot encode Transaction Type
ohe = OneHotEncoder(sparse_output=False)
trans_type_ohe = ohe.fit_transform(df[['Transaction Type']])
trans_type_df = pd.DataFrame(trans_type_ohe, columns=ohe.get_feature_names_out(['Transaction Type']))
df = pd.concat([df.reset_index(drop=True), trans_type_df.reset_index(drop=True)], axis=1)

# Drop irrelevant/leaking columns
df.drop(columns=[
    'Transaction ID', 'Timestamp', 'Sender Name', 'Receiver Name',
    'Sender UPI ID', 'Receiver UPI ID', 'Note',
    'Device Type', 'Device ID', 'Transaction Type'
], inplace=True)

# Labels and features
labels = df['Fraud']
features = df.drop(columns=['Fraud'])

# Feature selection and scaling
vt = VarianceThreshold(threshold=0.0)
features = pd.DataFrame(vt.fit_transform(features))
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

# Train/test split
X_rest, X_test, y_rest, y_test = train_test_split(
    X_scaled, labels, test_size=0.8, stratify=labels, random_state=42
)

# Use only genuine transactions for training
X_rest = X_rest[y_rest == 0]

# Autoencoder architecture
def create_autoencoder(input_dim):
    inp = Input(shape=(input_dim,))
    x = Dense(128, activation='relu')(inp)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(input_dim, activation='linear')(x)
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=Adam(1e-4), loss='mse')
    return model

# Train multiple autoencoders
n_models = 3
input_dim = X_rest.shape[1]
models = []
recon_errors = []

for i in range(n_models):
    idx = np.random.choice(len(X_rest), len(X_rest), replace=True)
    X_boot = X_rest[idx]

    model = create_autoencoder(input_dim)
    early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
    model.fit(X_boot, X_boot, epochs=100, batch_size=256, verbose=0, callbacks=[early_stop])
    
    pred = model.predict(X_test)
    err = np.mean(np.square(X_test - pred), axis=1)
    recon_errors.append(err)
    models.append(model)

# Average reconstruction error
avg_error = np.mean(recon_errors, axis=0)

# Precision-Recall AUC optimization
precisions, recalls, thresholds = precision_recall_curve(y_test, avg_error)
pr_au_score = auc(recalls, precisions)
f1_scores = 2 * (precisions * recalls) / (precisions + recalls + 1e-10)
best_f1_index = np.argmax(f1_scores)
best_thresh = thresholds[best_f1_index]
best_f1 = f1_scores[best_f1_index]

# Final predictions
y_pred = (avg_error > best_thresh).astype(int)

# Evaluation
print(f"\n🔍 Best Threshold (F1 Optimized): {best_thresh:.6f}")
print(f"📈 PR AUC: {pr_au_score:.4f}, Best F1: {best_f1:.4f}")
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, avg_error))

# ---- Save complete model ----
class FraudDetectionEnsemble:
    def __init__(self, models, scaler, vt, ohe, threshold):
        self.models = models
        self.scaler = scaler
        self.vt = vt
        self.ohe = ohe
        self.threshold = threshold

    def predict(self, X_raw_df):
        X = X_raw_df.copy()
        X['TimeSinceLastTX'] = X.groupby('Sender UPI ID')['Timestamp'].diff().dt.total_seconds().fillna(0)
        X['Hour'] = X['Timestamp'].dt.hour
        X['AvgAmountSender'] = X.groupby('Sender UPI ID')['Amount (INR)'].transform('mean')
        X['AvgAmountDevice'] = X.groupby('Device ID')['Amount (INR)'].transform('mean')
        X['Note'] = X['Note'].fillna('')
        note_counts = X['Note'].value_counts().to_dict()
        X['NoteFreq'] = X['Note'].map(note_counts)

        trans_type_ohe = self.ohe.transform(X[['Transaction Type']])
        trans_type_df = pd.DataFrame(trans_type_ohe, columns=self.ohe.get_feature_names_out(['Transaction Type']))
        X = pd.concat([X.reset_index(drop=True), trans_type_df.reset_index(drop=True)], axis=1)

        X.drop(columns=[
            'Transaction ID', 'Timestamp', 'Sender Name', 'Receiver Name',
            'Sender UPI ID', 'Receiver UPI ID', 'Note',
            'Device Type', 'Device ID', 'Transaction Type'
        ], inplace=True)

        X = pd.DataFrame(self.vt.transform(X))
        X = self.scaler.transform(X)

        errors = []
        for model in self.models:
            pred = model.predict(X)
            err = np.mean(np.square(X - pred), axis=1)
            errors.append(err)

        avg_error = np.mean(errors, axis=0)
        return (avg_error > self.threshold).astype(int), avg_error

# Save final object
ensemble_model = FraudDetectionEnsemble(models=models, scaler=scaler, vt=vt, ohe=ohe, threshold=best_thresh)
with open('final_fraud_model.pkl', 'wb') as f:
    pickle.dump(ensemble_model, f)
print("Saved ensemble model as 'final_fraud_model.pkl'")


[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 927us/step
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 972us/step
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step

🔍 Best Threshold (F1 Optimized): 0.000168
📈 PR AUC: 0.9107, Best F1: 0.8066

📊 Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.90      0.90     52969
           1       0.80      0.81      0.81     27031

    accuracy                           0.87     80000
   macro avg       0.85      0.85      0.85     80000
weighted avg       0.87      0.87      0.87     80000

Confusion Matrix:
 [[47663  5306]
 [ 5176 21855]]
ROC AUC Score: 0.9375393565715758
Saved ensemble model as 'final_fraud_model.pkl'
