In [13]:
#Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder # Importing OneHotEncoder here
from sklearn.compose import ColumnTransformer # Importing ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


# Step 1: START

# Step 2: Loading and observing the dataset
file_name = 'data-purchase-card-pcard-fiscal-year-2014.csv'  # Replace with the actual file name
df = pd.read_csv(file_name)

# Step 3: Data Cleaning and Preprocessing
# Assuming 'Class' column contains 0 for normal and 1 for fraud
# Check for null values and drop rows with missing values
df.dropna(inplace=True)

# Separating the fraud and normal transactions
normal = df[df['Class'] == 0]
fraud = df[df['Class'] == 1]

# Under-sampling the normal transactions to balance the dataset
normal_sampled = normal.sample(len(fraud), random_state=42)
balanced_data = pd.concat([normal_sampled, fraud])

# Splitting features and target
X = balanced_data.drop(columns=['Class'])  # Features
y = balanced_data['Class']  # Target

# Identifying numerical and categorical features
numerical_features = X.select_dtypes(include=np.number).columns.tolist()
categorical_features = X.select_dtypes(exclude=np.number).columns.tolist()

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features)])

# Scaling and normalizing the features using the pipeline
X_scaled = preprocessor.fit_transform(X)


# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
# Step 4: Training the model
# Initializing the ANN
model = Sequential()

# Adding input and hidden layers
model.add(Dense(units=16, activation='relu', input_dim=X_train.shape[1]))  # Input layer with 16 neurons
model.add(Dense(units=8, activation='relu'))  # Hidden layer with 8 neurons

# Adding the output layer
model.add(Dense(units=1, activation='sigmoid'))  # Output layer with 1 neuron for binary classification

# Compiling the ANN
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the ANN
model.fit(X_train, y_train, batch_size=32, epochs=50, verbose=1)

# Step 5: Analyzing the model
# Save the trained model for future testing
model.save('fraud_detection_ann_model.h5')
print("Model training complete and saved as 'fraud_detection_ann_model.h5'.")

# Predictions on training data (optional)
y_pred_train = model.predict(X_train)
y_pred_train = (y_pred_train > 0.5)  # Convert probabilities to binary predictions

# Step 1: START

# Load the saved model
model = load_model('fraud_detection_ann_model.h5')

# Load the test dataset
test_file_name = 'data-purchase-card-pcard-fiscal-year-2014.csv'  # Replace with the name of the test data file created earlier
test_df = pd.read_csv(test_file_name)

# Step 2: Preprocessing the test data
# Handle NaN values in y_test (e.g., drop rows with NaN)
test_df.dropna(subset=['Class'], inplace=True)

# Extract features (X_test) and labels (y_test) from the test dataset
X_test = test_df.drop(columns=['Class'])  # Assuming 'Class' is the target column
y_test = test_df['Class']

# Scale the test features using StandardScaler (used during training)
X_test_scaled = preprocessor.transform(X_test)

# Step 3: Prediction and classification
# Predict probabilities using the loaded model
y_pred_prob = model.predict(X_test_scaled)

# Convert probabilities to binary predictions
y_pred = (y_pred_prob > 0.5).astype(int)

# Count fraud and non-fraud predictions
fraudulent_transactions = np.sum(y_pred)
non_fraudulent_transactions = len(y_pred) - fraudulent_transactions

# Step 4: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# Step 5: Display results
print("Test Results:")
print(f"Fraudulent Transactions Predicted: {fraudulent_transactions}")
print(f"Non-Fraudulent Transactions Predicted: {non_fraudulent_transactions}")
print(f"Accuracy: {accuracy:}")
print(f"Precision: {precision:}")
print(f"Recall: {recall:}")

# Step 6: STOP

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4611 - loss: 0.6949
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6865 - loss: 0.6799 
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6719 - loss: 0.6708  
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6912 - loss: 0.6494 
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7255 - loss: 0.6259 
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7588 - loss: 0.5879  
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8034 - loss: 0.5501  
Epoch 8/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9009 - loss: 0.5060 
Epoch 9/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3



Model training complete and saved as 'fraud_detection_ann_model.h5'.
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step




[1m4972/4972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1ms/step
Test Results:
Fraudulent Transactions Predicted: 79591
Non-Fraudulent Transactions Predicted: 79501
Accuracy: 0.5015399894400724
Precision: 0.00405824779183576
Recall: 0.9073033707865169
