In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

In [None]:
file_path = '/content/drive/MyDrive/DNN/creditcard.csv'

In [None]:
df = pd.read_csv(file_path)

In [None]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


# Load dataset
df = pd.read_csv('/content/drive/MyDrive/DNN/creditcard.csv')

# Normalize 'Amount' and drop 'Time'
df['Amount'] = (df['Amount'] - df['Amount'].mean()) / df['Amount'].std()
df = df.drop(['Time'], axis=1)

# Separate fraud and non-fraud
fraud = df[df['Class'] == 1]
non_fraud = df[df['Class'] == 0].sample(n=len(fraud) * 5, random_state=42)  # 5:1 ratio

# Combine and shuffle
balanced_df = pd.concat([fraud, non_fraud]).sample(frac=1, random_state=42).reset_index(drop=True)

# Split features and target
X = balanced_df.drop('Class', axis=1).values
y = balanced_df['Class'].values.reshape(-1, 1)

# Train-test split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Initialize FNN
input_dim = X_train.shape[1]
hidden_dim = 16
output_dim = 1

W1 = np.random.randn(input_dim, hidden_dim) * 0.01
b1 = np.zeros((1, hidden_dim))
W2 = np.random.randn(hidden_dim, output_dim) * 0.01
b2 = np.zeros((1, output_dim))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return (x > 0).astype(float)

def binary_cross_entropy(y_true, y_pred):
    eps = 1e-10
    return -np.mean(y_true * np.log(y_pred + eps) + (1 - y_true) * np.log(1 - y_pred + eps))

# Dropout function
def dropout(X, rate=0.5):
    mask = (np.random.rand(*X.shape) > rate) / (1 - rate)
    return X * mask

# Training
lr = 0.01
epochs = 50
lambda_reg = 0.001  # L2 regularization term
dropout_rate = 0.5  # Dropout rate (50% chance to drop)

for epoch in range(epochs):
    # Forward
    z1 = np.dot(X_train, W1) + b1
    a1 = relu(z1)

    # Apply dropout after ReLU activation
    a1 = dropout(a1, rate=dropout_rate)

    z2 = np.dot(a1, W2) + b2
    y_pred = sigmoid(z2)

    # Weighted loss for class imbalance
    weight_pos = 5
    weight_neg = 1
    sample_weights = np.where(y_train == 1, weight_pos, weight_neg)

    # Binary cross entropy loss with L2 regularization
    loss = np.mean(sample_weights * (-y_train * np.log(y_pred + 1e-10) - (1 - y_train) * np.log(1 - y_pred + 1e-10)))

    # Add L2 regularization term to loss
    l2_loss = lambda_reg * (np.sum(W1**2) + np.sum(W2**2))
    total_loss = loss + l2_loss

    # Backward
    dz2 = (y_pred - y_train) * sample_weights
    dW2 = np.dot(a1.T, dz2) / len(X_train) + 2 * lambda_reg * W2  # L2 regularization
    db2 = np.sum(dz2, axis=0, keepdims=True) / len(X_train)

    da1 = np.dot(dz2, W2.T)
    dz1 = da1 * relu_deriv(z1)
    dW1 = np.dot(X_train.T, dz1) / len(X_train) + 2 * lambda_reg * W1  # L2 regularization
    db1 = np.sum(dz1, axis=0, keepdims=True) / len(X_train)

    # Update weights
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.6f}")

# Evaluation
def predict(X, threshold=0.60):
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1)

    # Apply dropout during evaluation
    a1 = dropout(a1, rate=dropout_rate)

    z2 = np.dot(a1, W2) + b2
    probs = sigmoid(z2)
    return (probs >= threshold).astype(int), probs

# Get predictions and probabilities
y_pred_label, y_pred_prob = predict(X_test)

# Adjust threshold for better precision recall balance
def adjust_threshold(y_test, probs, target_precision=0.80):
    best_threshold = 0.5
    best_f1 = 0
    for threshold in np.linspace(0.4, 0.7, 31):  # Search a bit wider range
        y_pred_label = (probs >= threshold).astype(int)
        TP = np.sum((y_pred_label == 1) & (y_test == 1))
        FP = np.sum((y_pred_label == 1) & (y_test == 0))
        FN = np.sum((y_pred_label == 0) & (y_test == 1))

        precision = TP / (TP + FP + 1e-10)
        recall = TP / (TP + FN + 1e-10)
        f1_score = 2 * precision * recall / (precision + recall + 1e-10)

        # Check if precision is closer to the target precision (without being overfitted)
        if abs(precision - target_precision) < abs(precision - best_f1):
            best_f1 = f1_score
            best_threshold = threshold

    return best_threshold

# Adjust threshold
best_threshold = adjust_threshold(y_test, y_pred_prob, target_precision=0.80)
print(f"Adjusted Threshold: {best_threshold:.2f}")

# Recalculate metrics with the adjusted threshold
y_pred_label = (y_pred_prob >= best_threshold).astype(int)

TP = np.sum((y_pred_label == 1) & (y_test == 1))
FP = np.sum((y_pred_label == 1) & (y_test == 0))
TN = np.sum((y_pred_label == 0) & (y_test == 0))
FN = np.sum((y_pred_label == 0) & (y_test == 1))

precision = TP / (TP + FP + 1e-10)
recall = TP / (TP + FN + 1e-10)
f1_score = 2 * precision * recall / (precision + recall + 1e-10)
accuracy = (TP + TN) / len(y_test)

# Display results
print("\n--- Evaluation Metrics ---")
print(f"Accuracy       : {accuracy:.4f}")
print(f"Precision      : {precision:.4f}")
print(f"Recall         : {recall:.4f}")
print(f"F1 Score       : {f1_score:.4f}") # Changed 'f1' to 'f1_score'
print("\nConfusion Matrix:\n")
print("                Predicted 0    Predicted 1")
# The variable 'cm' is also not defined, Assuming you want to print TP, FP, TN, FN
# print(f"Actual 0        {cm[0][0]:<15}{cm[0][1]}")
# print(f"Actual 1        {cm[1][0]:<15}{cm[1][1]}")
print(f"Actual 0        {TN:<15}{FP}")  # Print TN and FP
print(f"Actual 1        {FN:<15}{TP}")  # Print FN and TP
print("\nLegend:\nTP = True Positives  |  FP = False Positives")
print("TN = True Negatives  |  FN = False Negatives")



Epoch 1/50 - Loss: 1.176189
Epoch 5/50 - Loss: 1.171341
Epoch 10/50 - Loss: 1.163179
Epoch 15/50 - Loss: 1.147461
Epoch 20/50 - Loss: 1.119499
Epoch 25/50 - Loss: 1.072104
Epoch 30/50 - Loss: 1.013746
Epoch 35/50 - Loss: 0.951405
Epoch 40/50 - Loss: 0.896100
Epoch 45/50 - Loss: 0.862822
Epoch 50/50 - Loss: 0.822052
Adjusted Threshold: 0.53

--- Evaluation Metrics ---
Accuracy       : 0.9509
Precision      : 0.8148
Recall         : 0.8250
F1 Score       : 0.8199

Confusion Matrix:

                Predicted 0    Predicted 1
Actual 0        496            15
Actual 1        14             66

Legend:
TP = True Positives  |  FP = False Positives
TN = True Negatives  |  FN = False Negatives
