In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from sklearn.utils import class_weight


In [4]:
# Load the dataset
df = pd.read_csv('credit_card_fraud_dataset.csv')

In [5]:
# --- 1. Data Preprocessing ---

# Drop 'TransactionID' as it's just an identifier
df = df.drop('TransactionID', axis=1)


In [6]:
# Convert 'TransactionDate' to datetime and extract features
df['TransactionDate'] = pd.to_datetime(df['TransactionDate'], errors='coerce')

# Drop rows where TransactionDate is NaT (invalid date)
df.dropna(subset=['TransactionDate'], inplace=True)

df['hour'] = df['TransactionDate'].dt.hour
df['day'] = df['TransactionDate'].dt.day
df['month'] = df['TransactionDate'].dt.month
df = df.drop('TransactionDate', axis=1)

In [7]:
# Identify categorical and numerical features
categorical_features = ['TransactionType', 'Location']
numerical_features = ['Amount', 'MerchantID', 'hour', 'day', 'month']


In [8]:
# Create a preprocessing pipeline to scale numerical features and one-hot encode categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])


In [9]:
# Define features (X) and target (y)
X = df.drop('IsFraud', axis=1)
y = df['IsFraud']


In [10]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [11]:
# Create the full pipeline with preprocessing
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

In [12]:
# Fit and transform the training data, and transform the test data
X_train_processed = pipeline.fit_transform(X_train)
X_test_processed = pipeline.transform(X_test)

In [13]:
# --- 2. Handling Class Imbalance ---
# Calculate class weights to give more importance to the minority class (fraud)
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = {i : class_weights[i] for i in range(len(class_weights))}

In [14]:
# --- 3. Building and Training the Neural Network ---

# Build the neural network model
model = Sequential([
    InputLayer(input_shape=(X_train_processed.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid') # Sigmoid for binary classification
])



In [15]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [16]:
# Train the model with class weights
model.fit(X_train_processed, y_train, epochs=10, batch_size=32, validation_split=0.2, class_weight=class_weights, verbose=0)



<keras.src.callbacks.history.History at 0x7c12ee398650>

In [17]:
# --- 4. Model Evaluation ---

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_processed, y_test, verbose=0)


In [18]:
# Make predictions
y_pred_prob = model.predict(X_test_processed)
y_pred = (y_pred_prob > 0.5).astype(int) # Convert probabilities to binary predictions


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


In [19]:
# Print the results
print(f"Test Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Test Accuracy: 0.5659

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.57      0.72     19800
           1       0.01      0.41      0.02       200

    accuracy                           0.57     20000
   macro avg       0.50      0.49      0.37     20000
weighted avg       0.98      0.57      0.71     20000


Confusion Matrix:
[[11236  8564]
 [  118    82]]
