In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

# ✅ Load dataset
import pandas as pd
data = pd.read_csv('data.csv',on_bad_lines='skip')

# Assuming 'data' is your dataframe and 'isFraud' column is what you're using for classification
fraud = data[data['isFraud'] == 1]
non_fraud = data[data['isFraud'] == 0]
nu = list(data['isFraud'].value_counts())[1]

#Get same amount of non fraud cases as fraud ones (random selection)
non_fraud_sampled = non_fraud.sample(n=nu, random_state=42)

# Get all fraud cases
fraud_sampled = fraud.sample(n=nu, random_state=42)

# Combine the two datasets
balanced_data = pd.concat([non_fraud_sampled, fraud_sampled])

# Shuffle the data to mix fraud and non-fraud entries
df = balanced_data.sample(frac=1, random_state=42).reset_index(drop=True)


# ✅ Drop rows with missing target
df = df.dropna()

# ✅ Separate features and target
X = df.drop(columns=['isFraud'])
y = df['isFraud']

# ✅ Encode categorical features
cat_cols = X.select_dtypes(include=['object']).columns
ct = ColumnTransformer(
    transformers=[("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols)],
    remainder="passthrough"
)
X = ct.fit_transform(X)

# ✅ Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# ✅ Apply SMOTE
minority_class_size = y_train.value_counts().min()
k = min(5, minority_class_size - 1)

sm = SMOTE(random_state=42, k_neighbors=k)
X_train, y_train = sm.fit_resample(X_train, y_train)

# ✅ Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ✅ Build model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01), input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.009),
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# ✅ Early stopping
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# ✅ Train
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test), callbacks=[early_stop])

# ✅ Predict with custom threshold
y_prob = model.predict(X_test)
y_pred = (y_prob > 0.5).astype(int)

# ✅ Classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - loss: 2.6870 - precision: 0.6667 - recall: 0.5679 - val_loss: 5.7162 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - loss: 3.0772 - precision: 0.8779 - recall: 0.8357 - val_loss: 1.5938 - val_precision: 0.7259 - val_recall: 0.9333
Epoch 3/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 1.3851 - precision: 0.9535 - recall: 0.9568 - val_loss: 6.6181 - val_precision: 0.5006 - val_recall: 1.0000
Epoch 4/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - loss: 2.8990 - precision: 0.9192 - recall: 0.9456 - val_loss: 6.1947 - val_precision: 0.5000 - val_recall: 0.0024
Epoch 5/50
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 3.5217 - precision: 0.9500 - recall: 0.9153 - val_loss: 5.2258 - val_precision: 0.5036 - val_re