In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import joblib
import os
import tensorflow as tf

# Set TensorFlow to use as many CPU cores as possible
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
physical_devices = tf.config.list_physical_devices('CPU')
if physical_devices:
    tf.config.threading.set_inter_op_parallelism_threads(len(physical_devices))
    tf.config.threading.set_intra_op_parallelism_threads(len(physical_devices))

# Load the data
data = pd.read_csv('dataset.csv')

# Preprocess the data
data = data.drop(['nameOrig', 'nameDest'], axis=1)

# Encode the 'type' column
label_encoder = LabelEncoder()
data['type'] = label_encoder.fit_transform(data['type'])

# Split the data into features and target
X = data.drop(['isFraud'], axis=1)
y = data['isFraud']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply SMOTE to the training set
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Define the ANN model
ann_model = Sequential([
    Dense(32, input_dim=X_train_resampled.shape[1], activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
ann_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['Precision'])

# Define early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='Precision', patience=3, mode='max', restore_best_weights=True)

# Train the model
history = ann_model.fit(X_train_resampled, y_train_resampled, validation_split=0.2, 
                        epochs=20, batch_size=32, callbacks=[early_stopping], verbose=1)

# Make predictions
y_pred_proba = ann_model.predict(X_test)
y_pred = (y_pred_proba > 0.6).astype('int32')

# Calculate metrics
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Display metrics
print("Confusion Matrix:")
print(conf_matrix)
print("\nMetrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Save the model
ann_model.save('ann_model_best.h5')

# Save the scaler if needed for future transformations
joblib.dump(scaler, 'scaler.joblib') 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m254176/254176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m335s[0m 1ms/step - Precision: 0.9650 - loss: 0.0757 - val_Precision: 1.0000 - val_loss: 0.0452
Epoch 2/20
[1m254176/254176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m358s[0m 1ms/step - Precision: 0.9821 - loss: 0.0340 - val_Precision: 1.0000 - val_loss: 0.0287
Epoch 3/20
[1m254176/254176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 1ms/step - Precision: 0.9854 - loss: 0.0299 - val_Precision: 1.0000 - val_loss: 0.0183
Epoch 4/20
[1m254176/254176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m355s[0m 1ms/step - Precision: 0.9868 - loss: 0.0270 - val_Precision: 1.0000 - val_loss: 0.0250
Epoch 5/20
[1m254176/254176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 2ms/step - Precision: 0.9879 - loss: 0.0251 - val_Precision: 1.0000 - val_loss: 0.0188
Epoch 6/20
[1m254176/254176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m361s[0m 1ms/step - Precision: 0.9883 - loss: 0.0245 - val_Preci



Confusion Matrix:
[[1265319    5585]
 [     25    1595]]

Metrics:
Accuracy: 0.9955914387469313
Precision: 0.22214484679665739
Recall: 0.9845679012345679
F1-score: 0.3625


['scaler.joblib']