In [18]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.preprocessing import normalize, StandardScaler
from sklearn.model_selection import train_test_split

In [19]:
raw_data=pd.read_csv("Credit_Card_Fraud_Detection_Data")
raw_data.drop(raw_data.columns[0], axis=1, inplace=True)

raw_data.iloc[:, 1:30] = StandardScaler().fit_transform(raw_data.iloc[:, 1:30])
data_matrix = raw_data.values

# X: feature matrix (for this analysis, we exclude the Time variable from the dataset)
X = data_matrix[:, 1:30]

# y: labels vector
y = data_matrix[:, 30]

# data normalization
X = normalize(X, norm="l1")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [20]:
def class_weights_balanced(y):
    counts = Counter(y)
    n = float(len(y)) # this is the total number of samples in original dataset
    k = float(len(counts)) #this is total number of unique classes in the target variable set
    return {c: n / (k * cnt) for c, cnt in counts.items()} # the more the count of a particular class, the less it's weight; sum of the reciprocals in equal to k


def roc_auc(y_true, scores):
    y_true = np.asarray(y_true, dtype=np.int8)
    scores = np.asarray(scores, dtype=np.float64)

    # Rank the scores (1‑based)
    order = np.argsort(scores)
    ranks = np.empty_like(order, dtype=np.int64)
    ranks[order] = np.arange(1, len(scores) + 1)

    n_pos = y_true.sum()
    n_neg = len(y_true) - n_pos
    if n_pos == 0 or n_neg == 0:
        raise ValueError("Need at least one positive and one negative sample")

    pos_ranks_sum = ranks[y_true == 1].sum()
    return (pos_ranks_sum - n_pos * (n_pos + 1) / 2) / (n_pos * n_neg)


def train_linear_svm(X, y, epochs=10, lr0=0.01, C=1.0, random_state=31):

    rng = np.random.default_rng(random_state)
    X = np.asarray(X, dtype=np.float64)
    y = np.asarray(y, dtype=np.int8)
    n_samples, n_features = X.shape

    w = np.zeros(n_features, dtype=np.float64)
    y_signed = np.where(y == 1, 1.0, -1.0)
    cw = class_weights_balanced(y)

    t = 0  # global step counter
    for _ in range(epochs):
        for i in rng.permutation(n_samples):
            t += 1
            lr = lr0 / (1.0 + 0.01 * t)

            xi = X[i]
            yi = y_signed[i]
            weight = cw[int(y[i])]
            margin = yi * np.dot(w, xi)

            if margin < 1.0:
                # w ← (1‑lr)·w + lr·C·weight·yi·xi
                w = (1.0 - lr) * w + lr * C * weight * yi * xi
            else:
                # only regularisation term
                w *= (1.0 - lr)
    return w


def decision_function(X, w):
    X = np.asarray(X, dtype=np.float64)
    return X @ w

In [21]:
w = train_linear_svm(X_train, y_train, random_state=31)

y_pred_svm = decision_function(X_test, w)

roc_auc_svm = roc_auc(y_test, y_pred_svm)
print("SVM ROC-AUC score: {0:.3f}".format(roc_auc_svm))

SVM ROC-AUC score: 0.985
