# Credit Card Fraud Classifier
Detecting fraudulent credit card transactions using machine learning. It starts by loading and preprocessing a dataset containing transaction details and labels indicating fraudulence.

Upon preprocessing, the neural network is built layer by layer and is then trained on the training data set. After being optimized, with performance metrics evaluated on a separate testing dataset.

## Import Libraries

In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from google.colab import drive
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from xgboost import XGBClassifier

## Load Dataset & Preprocess

In [2]:
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/ML/fraud-detection/card_transdata.csv'

Mounted at /content/drive


In [3]:
# Load dataset
data = pd.read_csv(file_path)

In [4]:
# Preprocess data (e.g., handle missing values, encode categorical variables, scale features)
X = data.drop('fraud', axis=1)  # Features
y = data['fraud']  # Labels

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Sequential Neural Network

### Building / Training

In [16]:
# Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Building Neural Net
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compilation / Training
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7ac23a9a7310>

### Evaluating Model

In [17]:
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')

Test Loss: 0.006042294204235077, Test Accuracy: 0.998045027256012


## Random Forest

### Building / Training

In [5]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

### Evaluating Model

In [6]:
y_pred = rf_classifier.predict(X_test)

# Evaluate model
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    182557
         1.0       1.00      1.00      1.00     17443

    accuracy                           1.00    200000
   macro avg       1.00      1.00      1.00    200000
weighted avg       1.00      1.00      1.00    200000

[[182557      0]
 [     2  17441]]


## Gradient Boosting

### Building / Training

In [8]:
# Train XGBoost classifier
xgb_classifier = XGBClassifier(n_estimators=100, random_state=42)
xgb_classifier.fit(X_train, y_train)

### Evaluating Model

In [9]:
y_pred = xgb_classifier.predict(X_test)

# Evaluate model
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    182557
         1.0       0.99      0.99      0.99     17443

    accuracy                           1.00    200000
   macro avg       0.99      0.99      0.99    200000
weighted avg       1.00      1.00      1.00    200000

[[182367    190]
 [   179  17264]]


## Isolation Forest

### Building / Training

In [10]:
isolation_forest = IsolationForest(random_state=42)
isolation_forest.fit(X_train)

### Evaluating Model

In [11]:
y_pred = isolation_forest.predict(X_test)
# # Convert predictions to binary labels (1 for normal, -1 for anomaly)
# y_pred_binary = np.where(y_pred == 1, 0, 1)
# Evaluate model
y_pred_binary = y_pred
print(classification_report(y_test, y_pred_binary))
print(confusion_matrix(y_test, y_pred_binary))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        -1.0       0.00      0.00      0.00         0
         0.0       0.00      0.00      0.00    182557
         1.0       0.07      0.58      0.12     17443

    accuracy                           0.05    200000
   macro avg       0.02      0.19      0.04    200000
weighted avg       0.01      0.05      0.01    200000

[[     0      0      0]
 [ 42150      0 140407]
 [  7396      0  10047]]
